diff --git a/.gitattributes b/.gitattributes index 1fdbb2d0eadda37e7de76fae915dd543e3126345..fb98bdb77b7adde38050c3532ead103551e5b8fa 100644 --- a/.gitattributes +++ b/.gitattributes @@ -101,3 +101,7 @@ phivenv/Lib/site-packages/sympy/tensor/__pycache__/tensor.cpython-39.pyc filter= phivenv/Lib/site-packages/sympy/utilities/tests/__pycache__/test_wester.cpython-39.pyc filter=lfs diff=lfs merge=lfs -text phivenv/Lib/site-packages/tokenizers/tokenizers.pyd filter=lfs diff=lfs merge=lfs -text phivenv/Lib/site-packages/torch/bin/asmjit.dll filter=lfs diff=lfs merge=lfs -text +phivenv/Lib/site-packages/torch/bin/fbgemm.dll filter=lfs diff=lfs merge=lfs -text +phivenv/Lib/site-packages/torch/bin/protoc.exe filter=lfs diff=lfs merge=lfs -text +phivenv/Lib/site-packages/torch/distributed/__pycache__/distributed_c10d.cpython-39.pyc filter=lfs diff=lfs merge=lfs -text +phivenv/Lib/site-packages/torch/fx/experimental/__pycache__/symbolic_shapes.cpython-39.pyc filter=lfs diff=lfs merge=lfs -text diff --git a/phivenv/Lib/site-packages/torch/bin/fbgemm.dll b/phivenv/Lib/site-packages/torch/bin/fbgemm.dll new file mode 100644 index 0000000000000000000000000000000000000000..10f3d1d259134662dd6c0977f122e1b0db26d796 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/bin/fbgemm.dll @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc74454fddf405ed343fd8e95e2d288d374397c2d6b065b2e7620378d2733ec5 +size 5721600 diff --git a/phivenv/Lib/site-packages/torch/bin/protoc.exe b/phivenv/Lib/site-packages/torch/bin/protoc.exe new file mode 100644 index 0000000000000000000000000000000000000000..dae84fc480b9be30d304fe9c399f001ab182ce42 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/bin/protoc.exe @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:014404109f3ff43bb35277afb59c576ed3102799a96cfd6e5999922c86e0285f +size 2812416 diff --git a/phivenv/Lib/site-packages/torch/distributed/__pycache__/distributed_c10d.cpython-39.pyc b/phivenv/Lib/site-packages/torch/distributed/__pycache__/distributed_c10d.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..70e0ac057312ad545c91f8e97bc2ac491316a71d --- /dev/null +++ b/phivenv/Lib/site-packages/torch/distributed/__pycache__/distributed_c10d.cpython-39.pyc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c3ea402fd0040e615c501575bc3e955363caa3b4e30dbed32da55e3031f4bd8 +size 165761 diff --git a/phivenv/Lib/site-packages/torch/fx/experimental/__pycache__/symbolic_shapes.cpython-39.pyc b/phivenv/Lib/site-packages/torch/fx/experimental/__pycache__/symbolic_shapes.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cca584288b5470b0fe5077c8b52b1fb8c0450a2c --- /dev/null +++ b/phivenv/Lib/site-packages/torch/fx/experimental/__pycache__/symbolic_shapes.cpython-39.pyc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7cf373048053ebba942485805cb4369d49029e180f956adf3c84e8d600bd2a0 +size 193850 diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_slow_conv2d_backward_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_slow_conv2d_backward_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..175475ee7f9274f145331dfe805d5966863504d0 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_slow_conv2d_backward_ops.h @@ -0,0 +1,51 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _slow_conv2d_backward_grad_input { + using schema = ::std::tuple (const at::Tensor &, const at::Tensor &, const at::Tensor &, c10::SymIntArrayRef, c10::SymIntArrayRef, c10::SymIntArrayRef, at::Tensor &, at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_slow_conv2d_backward"; + static constexpr const char* overload_name = "grad_input"; + static constexpr const char* schema_str = "_slow_conv2d_backward.grad_input(Tensor grad_output, Tensor self, Tensor weight, SymInt[2] kernel_size, SymInt[2] stride, SymInt[2] padding, *, Tensor(a!) grad_input, Tensor(b!) grad_weight, Tensor(c!) grad_bias) -> (Tensor(a!), Tensor(b!), Tensor(c!))"; + static ::std::tuple call(const at::Tensor & grad_output, const at::Tensor & self, const at::Tensor & weight, c10::SymIntArrayRef kernel_size, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, at::Tensor & grad_input, at::Tensor & grad_weight, at::Tensor & grad_bias); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & grad_output, const at::Tensor & self, const at::Tensor & weight, c10::SymIntArrayRef kernel_size, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, at::Tensor & grad_input, at::Tensor & grad_weight, at::Tensor & grad_bias); +}; + +struct TORCH_API _slow_conv2d_backward_output_mask { + using schema = ::std::tuple (const at::Tensor &, const at::Tensor &, const at::Tensor &, c10::SymIntArrayRef, c10::SymIntArrayRef, c10::SymIntArrayRef, ::std::array); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_slow_conv2d_backward"; + static constexpr const char* overload_name = "output_mask"; + static constexpr const char* schema_str = "_slow_conv2d_backward.output_mask(Tensor grad_output, Tensor self, Tensor weight, SymInt[2] kernel_size, SymInt[2] stride, SymInt[2] padding, bool[3] output_mask) -> (Tensor grad_input, Tensor grad_weight, Tensor grad_bias)"; + static ::std::tuple call(const at::Tensor & grad_output, const at::Tensor & self, const at::Tensor & weight, c10::SymIntArrayRef kernel_size, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, ::std::array output_mask); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & grad_output, const at::Tensor & self, const at::Tensor & weight, c10::SymIntArrayRef kernel_size, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, ::std::array output_mask); +}; + +struct TORCH_API _slow_conv2d_backward_output_mask_out { + using schema = ::std::tuple (const at::Tensor &, const at::Tensor &, const at::Tensor &, c10::SymIntArrayRef, c10::SymIntArrayRef, c10::SymIntArrayRef, ::std::array, at::Tensor &, at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_slow_conv2d_backward"; + static constexpr const char* overload_name = "output_mask_out"; + static constexpr const char* schema_str = "_slow_conv2d_backward.output_mask_out(Tensor grad_output, Tensor self, Tensor weight, SymInt[2] kernel_size, SymInt[2] stride, SymInt[2] padding, bool[3] output_mask, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2) -> (Tensor(a!), Tensor(b!), Tensor(c!))"; + static ::std::tuple call(const at::Tensor & grad_output, const at::Tensor & self, const at::Tensor & weight, c10::SymIntArrayRef kernel_size, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, ::std::array output_mask, at::Tensor & out0, at::Tensor & out1, at::Tensor & out2); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & grad_output, const at::Tensor & self, const at::Tensor & weight, c10::SymIntArrayRef kernel_size, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, ::std::array output_mask, at::Tensor & out0, at::Tensor & out1, at::Tensor & out2); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_slow_conv2d_forward.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_slow_conv2d_forward.h new file mode 100644 index 0000000000000000000000000000000000000000..b898bd526ec85c995d6ba7249c88941a961ecd1a --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_slow_conv2d_forward.h @@ -0,0 +1,92 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_slow_conv2d_forward.output(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias, SymInt[2] stride, SymInt[2] padding, *, Tensor(a!) output) -> Tensor(a!) +inline at::Tensor & _slow_conv2d_forward_out(at::Tensor & output, const at::Tensor & self, const at::Tensor & weight, at::IntArrayRef kernel_size, const ::std::optional & bias, at::IntArrayRef stride, at::IntArrayRef padding) { + return at::_ops::_slow_conv2d_forward_output::call(self, weight, c10::fromIntArrayRefSlow(kernel_size), bias, c10::fromIntArrayRefSlow(stride), c10::fromIntArrayRefSlow(padding), output); +} +namespace symint { + template >> + at::Tensor & _slow_conv2d_forward_out(at::Tensor & output, const at::Tensor & self, const at::Tensor & weight, at::IntArrayRef kernel_size, const ::std::optional & bias, at::IntArrayRef stride, at::IntArrayRef padding) { + return at::_ops::_slow_conv2d_forward_output::call(self, weight, c10::fromIntArrayRefSlow(kernel_size), bias, c10::fromIntArrayRefSlow(stride), c10::fromIntArrayRefSlow(padding), output); + } +} + +// aten::_slow_conv2d_forward.output(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias, SymInt[2] stride, SymInt[2] padding, *, Tensor(a!) output) -> Tensor(a!) +inline at::Tensor & _slow_conv2d_forward_outf(const at::Tensor & self, const at::Tensor & weight, at::IntArrayRef kernel_size, const ::std::optional & bias, at::IntArrayRef stride, at::IntArrayRef padding, at::Tensor & output) { + return at::_ops::_slow_conv2d_forward_output::call(self, weight, c10::fromIntArrayRefSlow(kernel_size), bias, c10::fromIntArrayRefSlow(stride), c10::fromIntArrayRefSlow(padding), output); +} +namespace symint { + template >> + at::Tensor & _slow_conv2d_forward_outf(const at::Tensor & self, const at::Tensor & weight, at::IntArrayRef kernel_size, const ::std::optional & bias, at::IntArrayRef stride, at::IntArrayRef padding, at::Tensor & output) { + return at::_ops::_slow_conv2d_forward_output::call(self, weight, c10::fromIntArrayRefSlow(kernel_size), bias, c10::fromIntArrayRefSlow(stride), c10::fromIntArrayRefSlow(padding), output); + } +} + +// aten::_slow_conv2d_forward.output(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias, SymInt[2] stride, SymInt[2] padding, *, Tensor(a!) output) -> Tensor(a!) +inline at::Tensor & _slow_conv2d_forward_symint_out(at::Tensor & output, const at::Tensor & self, const at::Tensor & weight, c10::SymIntArrayRef kernel_size, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding) { + return at::_ops::_slow_conv2d_forward_output::call(self, weight, kernel_size, bias, stride, padding, output); +} +namespace symint { + template >> + at::Tensor & _slow_conv2d_forward_out(at::Tensor & output, const at::Tensor & self, const at::Tensor & weight, c10::SymIntArrayRef kernel_size, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding) { + return at::_ops::_slow_conv2d_forward_output::call(self, weight, kernel_size, bias, stride, padding, output); + } +} + +// aten::_slow_conv2d_forward.output(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias, SymInt[2] stride, SymInt[2] padding, *, Tensor(a!) output) -> Tensor(a!) +inline at::Tensor & _slow_conv2d_forward_symint_outf(const at::Tensor & self, const at::Tensor & weight, c10::SymIntArrayRef kernel_size, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, at::Tensor & output) { + return at::_ops::_slow_conv2d_forward_output::call(self, weight, kernel_size, bias, stride, padding, output); +} +namespace symint { + template >> + at::Tensor & _slow_conv2d_forward_outf(const at::Tensor & self, const at::Tensor & weight, c10::SymIntArrayRef kernel_size, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, at::Tensor & output) { + return at::_ops::_slow_conv2d_forward_output::call(self, weight, kernel_size, bias, stride, padding, output); + } +} + +// aten::_slow_conv2d_forward(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias, SymInt[2] stride, SymInt[2] padding) -> Tensor +inline at::Tensor _slow_conv2d_forward(const at::Tensor & self, const at::Tensor & weight, at::IntArrayRef kernel_size, const ::std::optional & bias, at::IntArrayRef stride, at::IntArrayRef padding) { + return at::_ops::_slow_conv2d_forward::call(self, weight, c10::fromIntArrayRefSlow(kernel_size), bias, c10::fromIntArrayRefSlow(stride), c10::fromIntArrayRefSlow(padding)); +} +namespace symint { + template >> + at::Tensor _slow_conv2d_forward(const at::Tensor & self, const at::Tensor & weight, at::IntArrayRef kernel_size, const ::std::optional & bias, at::IntArrayRef stride, at::IntArrayRef padding) { + return at::_ops::_slow_conv2d_forward::call(self, weight, c10::fromIntArrayRefSlow(kernel_size), bias, c10::fromIntArrayRefSlow(stride), c10::fromIntArrayRefSlow(padding)); + } +} + +// aten::_slow_conv2d_forward(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias, SymInt[2] stride, SymInt[2] padding) -> Tensor +inline at::Tensor _slow_conv2d_forward_symint(const at::Tensor & self, const at::Tensor & weight, c10::SymIntArrayRef kernel_size, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding) { + return at::_ops::_slow_conv2d_forward::call(self, weight, kernel_size, bias, stride, padding); +} +namespace symint { + template >> + at::Tensor _slow_conv2d_forward(const at::Tensor & self, const at::Tensor & weight, c10::SymIntArrayRef kernel_size, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding) { + return at::_ops::_slow_conv2d_forward::call(self, weight, kernel_size, bias, stride, padding); + } +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_slow_conv2d_forward_cpu_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_slow_conv2d_forward_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..53bcdb831ec94c483d8ae5c3e9c678fb326a01eb --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_slow_conv2d_forward_cpu_dispatch.h @@ -0,0 +1,28 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor _slow_conv2d_forward(const at::Tensor & self, const at::Tensor & weight, at::IntArrayRef kernel_size, const ::std::optional & bias, at::IntArrayRef stride, at::IntArrayRef padding); +TORCH_API at::Tensor _slow_conv2d_forward_symint(const at::Tensor & self, const at::Tensor & weight, c10::SymIntArrayRef kernel_size, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding); +TORCH_API at::Tensor & _slow_conv2d_forward_out(at::Tensor & output, const at::Tensor & self, const at::Tensor & weight, at::IntArrayRef kernel_size, const ::std::optional & bias, at::IntArrayRef stride, at::IntArrayRef padding); +TORCH_API at::Tensor & _slow_conv2d_forward_outf(const at::Tensor & self, const at::Tensor & weight, at::IntArrayRef kernel_size, const ::std::optional & bias, at::IntArrayRef stride, at::IntArrayRef padding, at::Tensor & output); +TORCH_API at::Tensor & _slow_conv2d_forward_symint_out(at::Tensor & output, const at::Tensor & self, const at::Tensor & weight, c10::SymIntArrayRef kernel_size, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding); +TORCH_API at::Tensor & _slow_conv2d_forward_symint_outf(const at::Tensor & self, const at::Tensor & weight, c10::SymIntArrayRef kernel_size, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, at::Tensor & output); + +} // namespace cpu +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_slow_conv2d_forward_cuda_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_slow_conv2d_forward_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..38759275b57eda40e65c9f862c7e142c3c00de80 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_slow_conv2d_forward_cuda_dispatch.h @@ -0,0 +1,28 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor _slow_conv2d_forward(const at::Tensor & self, const at::Tensor & weight, at::IntArrayRef kernel_size, const ::std::optional & bias, at::IntArrayRef stride, at::IntArrayRef padding); +TORCH_API at::Tensor _slow_conv2d_forward_symint(const at::Tensor & self, const at::Tensor & weight, c10::SymIntArrayRef kernel_size, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding); +TORCH_API at::Tensor & _slow_conv2d_forward_out(at::Tensor & output, const at::Tensor & self, const at::Tensor & weight, at::IntArrayRef kernel_size, const ::std::optional & bias, at::IntArrayRef stride, at::IntArrayRef padding); +TORCH_API at::Tensor & _slow_conv2d_forward_outf(const at::Tensor & self, const at::Tensor & weight, at::IntArrayRef kernel_size, const ::std::optional & bias, at::IntArrayRef stride, at::IntArrayRef padding, at::Tensor & output); +TORCH_API at::Tensor & _slow_conv2d_forward_symint_out(at::Tensor & output, const at::Tensor & self, const at::Tensor & weight, c10::SymIntArrayRef kernel_size, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding); +TORCH_API at::Tensor & _slow_conv2d_forward_symint_outf(const at::Tensor & self, const at::Tensor & weight, c10::SymIntArrayRef kernel_size, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, at::Tensor & output); + +} // namespace cuda +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_slow_conv2d_forward_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_slow_conv2d_forward_native.h new file mode 100644 index 0000000000000000000000000000000000000000..dcc845eb21f8bb6feca0e8a8be87cc7aca0c8308 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_slow_conv2d_forward_native.h @@ -0,0 +1,24 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor slow_conv2d_forward_cpu(const at::Tensor & self, const at::Tensor & weight, at::IntArrayRef kernel_size, const ::std::optional & bias, at::IntArrayRef stride, at::IntArrayRef padding); +TORCH_API at::Tensor & slow_conv2d_forward_out_cpu(const at::Tensor & self, const at::Tensor & weight, at::IntArrayRef kernel_size, const ::std::optional & bias, at::IntArrayRef stride, at::IntArrayRef padding, at::Tensor & output); +TORCH_API at::Tensor slow_conv2d_forward_cuda(const at::Tensor & self, const at::Tensor & weight, at::IntArrayRef kernel_size, const ::std::optional & bias, at::IntArrayRef stride, at::IntArrayRef padding); +TORCH_API at::Tensor & slow_conv2d_forward_out_cuda(const at::Tensor & self, const at::Tensor & weight, at::IntArrayRef kernel_size, const ::std::optional & bias, at::IntArrayRef stride, at::IntArrayRef padding, at::Tensor & output); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_slow_conv2d_forward_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_slow_conv2d_forward_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..503ea2576b3f537a4009b697ee6dc54f3bc3091c --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_slow_conv2d_forward_ops.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _slow_conv2d_forward_output { + using schema = at::Tensor & (const at::Tensor &, const at::Tensor &, c10::SymIntArrayRef, const ::std::optional &, c10::SymIntArrayRef, c10::SymIntArrayRef, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_slow_conv2d_forward"; + static constexpr const char* overload_name = "output"; + static constexpr const char* schema_str = "_slow_conv2d_forward.output(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias, SymInt[2] stride, SymInt[2] padding, *, Tensor(a!) output) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, const at::Tensor & weight, c10::SymIntArrayRef kernel_size, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, at::Tensor & output); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & weight, c10::SymIntArrayRef kernel_size, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, at::Tensor & output); +}; + +struct TORCH_API _slow_conv2d_forward { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &, c10::SymIntArrayRef, const ::std::optional &, c10::SymIntArrayRef, c10::SymIntArrayRef); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_slow_conv2d_forward"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_slow_conv2d_forward(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias, SymInt[2] stride, SymInt[2] padding) -> Tensor"; + static at::Tensor call(const at::Tensor & self, const at::Tensor & weight, c10::SymIntArrayRef kernel_size, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & weight, c10::SymIntArrayRef kernel_size, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sobol_engine_draw.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sobol_engine_draw.h new file mode 100644 index 0000000000000000000000000000000000000000..fd5ba58dd1713e3803a1cf6a34e11633b2584789 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sobol_engine_draw.h @@ -0,0 +1,31 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_sobol_engine_draw(Tensor quasi, int n, Tensor sobolstate, int dimension, int num_generated, ScalarType? dtype) -> (Tensor, Tensor) +inline ::std::tuple _sobol_engine_draw(const at::Tensor & quasi, int64_t n, const at::Tensor & sobolstate, int64_t dimension, int64_t num_generated, ::std::optional dtype) { + return at::_ops::_sobol_engine_draw::call(quasi, n, sobolstate, dimension, num_generated, dtype); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sobol_engine_draw_compositeimplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sobol_engine_draw_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..1684004be9198a24cd2ff94d5b4d1ed96cbb7c2f --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sobol_engine_draw_compositeimplicitautograd_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API ::std::tuple _sobol_engine_draw(const at::Tensor & quasi, int64_t n, const at::Tensor & sobolstate, int64_t dimension, int64_t num_generated, ::std::optional dtype); + +} // namespace compositeimplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sobol_engine_draw_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sobol_engine_draw_native.h new file mode 100644 index 0000000000000000000000000000000000000000..5e53cea82298d5a780aeaf0c0b8728a6b7eaaef6 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sobol_engine_draw_native.h @@ -0,0 +1,21 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API ::std::tuple _sobol_engine_draw(const at::Tensor & quasi, int64_t n, const at::Tensor & sobolstate, int64_t dimension, int64_t num_generated, ::std::optional dtype); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sobol_engine_draw_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sobol_engine_draw_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..6614ccf5e751e62419e443ba4f5ff9fbb420a956 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sobol_engine_draw_ops.h @@ -0,0 +1,29 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _sobol_engine_draw { + using schema = ::std::tuple (const at::Tensor &, int64_t, const at::Tensor &, int64_t, int64_t, ::std::optional); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_sobol_engine_draw"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_sobol_engine_draw(Tensor quasi, int n, Tensor sobolstate, int dimension, int num_generated, ScalarType? dtype) -> (Tensor, Tensor)"; + static ::std::tuple call(const at::Tensor & quasi, int64_t n, const at::Tensor & sobolstate, int64_t dimension, int64_t num_generated, ::std::optional dtype); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & quasi, int64_t n, const at::Tensor & sobolstate, int64_t dimension, int64_t num_generated, ::std::optional dtype); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sobol_engine_ff.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sobol_engine_ff.h new file mode 100644 index 0000000000000000000000000000000000000000..c5fc60cc6e53c1f3fb66665980994d73f3b63088 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sobol_engine_ff.h @@ -0,0 +1,31 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_sobol_engine_ff_(Tensor(a!) self, int n, Tensor sobolstate, int dimension, int num_generated) -> Tensor(a!) +inline at::Tensor & _sobol_engine_ff_(at::Tensor & self, int64_t n, const at::Tensor & sobolstate, int64_t dimension, int64_t num_generated) { + return at::_ops::_sobol_engine_ff_::call(self, n, sobolstate, dimension, num_generated); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sobol_engine_ff_compositeimplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sobol_engine_ff_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..be216619fe8435a8051ea2e21023e39d94e57a9d --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sobol_engine_ff_compositeimplicitautograd_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor & _sobol_engine_ff_(at::Tensor & self, int64_t n, const at::Tensor & sobolstate, int64_t dimension, int64_t num_generated); + +} // namespace compositeimplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sobol_engine_ff_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sobol_engine_ff_native.h new file mode 100644 index 0000000000000000000000000000000000000000..bb9ada3623b4aa04e1719d1591ba77fdf4c07298 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sobol_engine_ff_native.h @@ -0,0 +1,21 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor & _sobol_engine_ff_(at::Tensor & self, int64_t n, const at::Tensor & sobolstate, int64_t dimension, int64_t num_generated); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sobol_engine_ff_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sobol_engine_ff_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..e1fa6d729371992f91109b91559f8f25bc2de057 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sobol_engine_ff_ops.h @@ -0,0 +1,29 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _sobol_engine_ff_ { + using schema = at::Tensor & (at::Tensor &, int64_t, const at::Tensor &, int64_t, int64_t); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_sobol_engine_ff_"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_sobol_engine_ff_(Tensor(a!) self, int n, Tensor sobolstate, int dimension, int num_generated) -> Tensor(a!)"; + static at::Tensor & call(at::Tensor & self, int64_t n, const at::Tensor & sobolstate, int64_t dimension, int64_t num_generated); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, at::Tensor & self, int64_t n, const at::Tensor & sobolstate, int64_t dimension, int64_t num_generated); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sobol_engine_initialize_state.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sobol_engine_initialize_state.h new file mode 100644 index 0000000000000000000000000000000000000000..60dd5cf45f549ff5717fff5cd76c56cfc07f25c4 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sobol_engine_initialize_state.h @@ -0,0 +1,31 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_sobol_engine_initialize_state_(Tensor(a!) self, int dimension) -> Tensor(a!) +inline at::Tensor & _sobol_engine_initialize_state_(at::Tensor & self, int64_t dimension) { + return at::_ops::_sobol_engine_initialize_state_::call(self, dimension); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sobol_engine_initialize_state_compositeimplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sobol_engine_initialize_state_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..a3eadfeb6637014afca7a26024f9bc41c6c8636a --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sobol_engine_initialize_state_compositeimplicitautograd_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor & _sobol_engine_initialize_state_(at::Tensor & self, int64_t dimension); + +} // namespace compositeimplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sobol_engine_initialize_state_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sobol_engine_initialize_state_native.h new file mode 100644 index 0000000000000000000000000000000000000000..7485130be668e7c27112a3dd17d3e9371ca6a49b --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sobol_engine_initialize_state_native.h @@ -0,0 +1,21 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor & _sobol_engine_initialize_state_(at::Tensor & self, int64_t dimension); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sobol_engine_initialize_state_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sobol_engine_initialize_state_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..ea2a5ba83b343e34687a27398eb9092a9492ff48 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sobol_engine_initialize_state_ops.h @@ -0,0 +1,29 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _sobol_engine_initialize_state_ { + using schema = at::Tensor & (at::Tensor &, int64_t); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_sobol_engine_initialize_state_"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_sobol_engine_initialize_state_(Tensor(a!) self, int dimension) -> Tensor(a!)"; + static at::Tensor & call(at::Tensor & self, int64_t dimension); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, at::Tensor & self, int64_t dimension); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sobol_engine_scramble.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sobol_engine_scramble.h new file mode 100644 index 0000000000000000000000000000000000000000..99ff06fa96f8bb00db65008a278983c0a7c896b6 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sobol_engine_scramble.h @@ -0,0 +1,31 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_sobol_engine_scramble_(Tensor(a!) self, Tensor ltm, int dimension) -> Tensor(a!) +inline at::Tensor & _sobol_engine_scramble_(at::Tensor & self, const at::Tensor & ltm, int64_t dimension) { + return at::_ops::_sobol_engine_scramble_::call(self, ltm, dimension); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sobol_engine_scramble_compositeimplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sobol_engine_scramble_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..e38c9700dfb6b2a06914d371ef5e4ebf8f5675a0 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sobol_engine_scramble_compositeimplicitautograd_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor & _sobol_engine_scramble_(at::Tensor & self, const at::Tensor & ltm, int64_t dimension); + +} // namespace compositeimplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sobol_engine_scramble_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sobol_engine_scramble_native.h new file mode 100644 index 0000000000000000000000000000000000000000..d459d0cbcd93e447ab99e2d5f1821f54eccf76c1 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sobol_engine_scramble_native.h @@ -0,0 +1,21 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor & _sobol_engine_scramble_(at::Tensor & self, const at::Tensor & ltm, int64_t dimension); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sobol_engine_scramble_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sobol_engine_scramble_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..db90df8009d4eb0f4cce36ebf1d558235f0b0b79 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sobol_engine_scramble_ops.h @@ -0,0 +1,29 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _sobol_engine_scramble_ { + using schema = at::Tensor & (at::Tensor &, const at::Tensor &, int64_t); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_sobol_engine_scramble_"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_sobol_engine_scramble_(Tensor(a!) self, Tensor ltm, int dimension) -> Tensor(a!)"; + static at::Tensor & call(at::Tensor & self, const at::Tensor & ltm, int64_t dimension); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, at::Tensor & self, const at::Tensor & ltm, int64_t dimension); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_softmax.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_softmax.h new file mode 100644 index 0000000000000000000000000000000000000000..e7ea170a7a83e8055273d0701e11f51be8fd577e --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_softmax.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_softmax(Tensor self, int dim, bool half_to_float) -> Tensor +inline at::Tensor _softmax(const at::Tensor & self, int64_t dim, bool half_to_float) { + return at::_ops::_softmax::call(self, dim, half_to_float); +} + +// aten::_softmax.out(Tensor self, int dim, bool half_to_float, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _softmax_out(at::Tensor & out, const at::Tensor & self, int64_t dim, bool half_to_float) { + return at::_ops::_softmax_out::call(self, dim, half_to_float, out); +} +// aten::_softmax.out(Tensor self, int dim, bool half_to_float, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _softmax_outf(const at::Tensor & self, int64_t dim, bool half_to_float, at::Tensor & out) { + return at::_ops::_softmax_out::call(self, dim, half_to_float, out); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_softmax_backward_data.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_softmax_backward_data.h new file mode 100644 index 0000000000000000000000000000000000000000..c31d196aed2902151ddc22465bfc2943b1b4d12d --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_softmax_backward_data.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_softmax_backward_data(Tensor grad_output, Tensor output, int dim, ScalarType input_dtype) -> Tensor +inline at::Tensor _softmax_backward_data(const at::Tensor & grad_output, const at::Tensor & output, int64_t dim, at::ScalarType input_dtype) { + return at::_ops::_softmax_backward_data::call(grad_output, output, dim, input_dtype); +} + +// aten::_softmax_backward_data.out(Tensor grad_output, Tensor output, int dim, ScalarType input_dtype, *, Tensor(a!) grad_input) -> Tensor(a!) +inline at::Tensor & _softmax_backward_data_out(at::Tensor & grad_input, const at::Tensor & grad_output, const at::Tensor & output, int64_t dim, at::ScalarType input_dtype) { + return at::_ops::_softmax_backward_data_out::call(grad_output, output, dim, input_dtype, grad_input); +} +// aten::_softmax_backward_data.out(Tensor grad_output, Tensor output, int dim, ScalarType input_dtype, *, Tensor(a!) grad_input) -> Tensor(a!) +inline at::Tensor & _softmax_backward_data_outf(const at::Tensor & grad_output, const at::Tensor & output, int64_t dim, at::ScalarType input_dtype, at::Tensor & grad_input) { + return at::_ops::_softmax_backward_data_out::call(grad_output, output, dim, input_dtype, grad_input); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_softmax_backward_data_compositeexplicitautogradnonfunctional_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_softmax_backward_data_compositeexplicitautogradnonfunctional_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..c7483d82bba360a7ff9bf6e04491efc1d27aaf54 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_softmax_backward_data_compositeexplicitautogradnonfunctional_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautogradnonfunctional { + +TORCH_API at::Tensor _softmax_backward_data(const at::Tensor & grad_output, const at::Tensor & output, int64_t dim, at::ScalarType input_dtype); + +} // namespace compositeexplicitautogradnonfunctional +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_softmax_backward_data_cpu_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_softmax_backward_data_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..11b8c92baf10851b0da0b2e36b82f3ac57a3584c --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_softmax_backward_data_cpu_dispatch.h @@ -0,0 +1,25 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor _softmax_backward_data(const at::Tensor & grad_output, const at::Tensor & output, int64_t dim, at::ScalarType input_dtype); +TORCH_API at::Tensor & _softmax_backward_data_out(at::Tensor & grad_input, const at::Tensor & grad_output, const at::Tensor & output, int64_t dim, at::ScalarType input_dtype); +TORCH_API at::Tensor & _softmax_backward_data_outf(const at::Tensor & grad_output, const at::Tensor & output, int64_t dim, at::ScalarType input_dtype, at::Tensor & grad_input); + +} // namespace cpu +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_softmax_backward_data_cuda_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_softmax_backward_data_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..0dea6f935814fbd7954b7d96a2c28fc405e339f7 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_softmax_backward_data_cuda_dispatch.h @@ -0,0 +1,25 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor _softmax_backward_data(const at::Tensor & grad_output, const at::Tensor & output, int64_t dim, at::ScalarType input_dtype); +TORCH_API at::Tensor & _softmax_backward_data_out(at::Tensor & grad_input, const at::Tensor & grad_output, const at::Tensor & output, int64_t dim, at::ScalarType input_dtype); +TORCH_API at::Tensor & _softmax_backward_data_outf(const at::Tensor & grad_output, const at::Tensor & output, int64_t dim, at::ScalarType input_dtype, at::Tensor & grad_input); + +} // namespace cuda +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_softmax_backward_data_meta.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_softmax_backward_data_meta.h new file mode 100644 index 0000000000000000000000000000000000000000..c5c144f030b1e8ecbbcf8cde625d004a057a0e02 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_softmax_backward_data_meta.h @@ -0,0 +1,27 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeMetaFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace meta { + +struct TORCH_API structured__softmax_backward_data : public at::impl::MetaBase { + + + void meta(const at::Tensor & grad_output, const at::Tensor & output, int64_t dim, at::ScalarType input_dtype); +}; + +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_softmax_backward_data_meta_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_softmax_backward_data_meta_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..f83a0d1b947f1f0a1d06db442057788996f571c3 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_softmax_backward_data_meta_dispatch.h @@ -0,0 +1,25 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace meta { + +TORCH_API at::Tensor _softmax_backward_data(const at::Tensor & grad_output, const at::Tensor & output, int64_t dim, at::ScalarType input_dtype); +TORCH_API at::Tensor & _softmax_backward_data_out(at::Tensor & grad_input, const at::Tensor & grad_output, const at::Tensor & output, int64_t dim, at::ScalarType input_dtype); +TORCH_API at::Tensor & _softmax_backward_data_outf(const at::Tensor & grad_output, const at::Tensor & output, int64_t dim, at::ScalarType input_dtype, at::Tensor & grad_input); + +} // namespace meta +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_softmax_backward_data_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_softmax_backward_data_native.h new file mode 100644 index 0000000000000000000000000000000000000000..e2003ded040f5226ffc5b5f2f4908548578f8886 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_softmax_backward_data_native.h @@ -0,0 +1,27 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace native { +struct TORCH_API structured_softmax_backward_cpu_out : public at::meta::structured__softmax_backward_data { +void impl(const at::Tensor & grad_output, const at::Tensor & output, int64_t dim, at::ScalarType input_dtype, const at::Tensor & grad_input); +}; +struct TORCH_API structured_softmax_backward_cuda_out : public at::meta::structured__softmax_backward_data { +void impl(const at::Tensor & grad_output, const at::Tensor & output, int64_t dim, at::ScalarType input_dtype, const at::Tensor & grad_input); +}; +TORCH_API at::Tensor nested_softmax_backward(const at::Tensor & grad_output, const at::Tensor & output, int64_t dim, at::ScalarType input_dtype); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_softmax_backward_data_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_softmax_backward_data_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..8bdfe1120aa25d8f7355e22fd5c2470e95bf3cc2 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_softmax_backward_data_ops.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _softmax_backward_data { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &, int64_t, at::ScalarType); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_softmax_backward_data"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_softmax_backward_data(Tensor grad_output, Tensor output, int dim, ScalarType input_dtype) -> Tensor"; + static at::Tensor call(const at::Tensor & grad_output, const at::Tensor & output, int64_t dim, at::ScalarType input_dtype); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & grad_output, const at::Tensor & output, int64_t dim, at::ScalarType input_dtype); +}; + +struct TORCH_API _softmax_backward_data_out { + using schema = at::Tensor & (const at::Tensor &, const at::Tensor &, int64_t, at::ScalarType, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_softmax_backward_data"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_softmax_backward_data.out(Tensor grad_output, Tensor output, int dim, ScalarType input_dtype, *, Tensor(a!) grad_input) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & grad_output, const at::Tensor & output, int64_t dim, at::ScalarType input_dtype, at::Tensor & grad_input); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & grad_output, const at::Tensor & output, int64_t dim, at::ScalarType input_dtype, at::Tensor & grad_input); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_softmax_compositeexplicitautogradnonfunctional_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_softmax_compositeexplicitautogradnonfunctional_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..d07efd92e1e1ba4581a4f643d9b0eb6f79eaff57 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_softmax_compositeexplicitautogradnonfunctional_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautogradnonfunctional { + +TORCH_API at::Tensor _softmax(const at::Tensor & self, int64_t dim, bool half_to_float); + +} // namespace compositeexplicitautogradnonfunctional +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_softmax_cpu_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_softmax_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..036868128292c0043565a931ac97e5501f7827db --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_softmax_cpu_dispatch.h @@ -0,0 +1,25 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor _softmax(const at::Tensor & self, int64_t dim, bool half_to_float); +TORCH_API at::Tensor & _softmax_out(at::Tensor & out, const at::Tensor & self, int64_t dim, bool half_to_float); +TORCH_API at::Tensor & _softmax_outf(const at::Tensor & self, int64_t dim, bool half_to_float, at::Tensor & out); + +} // namespace cpu +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_softmax_cuda_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_softmax_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..bafe40725145ed56b85d734154371abf13532ea2 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_softmax_cuda_dispatch.h @@ -0,0 +1,25 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor _softmax(const at::Tensor & self, int64_t dim, bool half_to_float); +TORCH_API at::Tensor & _softmax_out(at::Tensor & out, const at::Tensor & self, int64_t dim, bool half_to_float); +TORCH_API at::Tensor & _softmax_outf(const at::Tensor & self, int64_t dim, bool half_to_float, at::Tensor & out); + +} // namespace cuda +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_softmax_meta.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_softmax_meta.h new file mode 100644 index 0000000000000000000000000000000000000000..0066c3ec86e7c7c5ffd298f59adafde901a7dd0b --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_softmax_meta.h @@ -0,0 +1,27 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeMetaFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace meta { + +struct TORCH_API structured__softmax : public at::impl::MetaBase { + + + void meta(const at::Tensor & self, int64_t dim, bool half_to_float); +}; + +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_softmax_meta_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_softmax_meta_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..50a68f69b8db67bdc144f78e24682d7b99728911 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_softmax_meta_dispatch.h @@ -0,0 +1,25 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace meta { + +TORCH_API at::Tensor _softmax(const at::Tensor & self, int64_t dim, bool half_to_float); +TORCH_API at::Tensor & _softmax_out(at::Tensor & out, const at::Tensor & self, int64_t dim, bool half_to_float); +TORCH_API at::Tensor & _softmax_outf(const at::Tensor & self, int64_t dim, bool half_to_float, at::Tensor & out); + +} // namespace meta +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_softmax_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_softmax_native.h new file mode 100644 index 0000000000000000000000000000000000000000..95cf2466fc059c0793691727552563b0c0d5f23c --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_softmax_native.h @@ -0,0 +1,28 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace native { +struct TORCH_API structured_softmax_cpu_out : public at::meta::structured__softmax { +void impl(const at::Tensor & self, int64_t dim, bool half_to_float, const at::Tensor & out); +}; +struct TORCH_API structured_softmax_cuda_out : public at::meta::structured__softmax { +void impl(const at::Tensor & self, int64_t dim, bool half_to_float, const at::Tensor & out); +}; +TORCH_API at::Tensor softmax_nested(const at::Tensor & self, int64_t dim, bool half_to_float); +TORCH_API at::Tensor mkldnn_softmax(const at::Tensor & self, int64_t dim, bool half_to_float); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_softmax_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_softmax_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..e9727f6503b2c41d03c532745d9c68c53f3798b1 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_softmax_ops.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _softmax { + using schema = at::Tensor (const at::Tensor &, int64_t, bool); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_softmax"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_softmax(Tensor self, int dim, bool half_to_float) -> Tensor"; + static at::Tensor call(const at::Tensor & self, int64_t dim, bool half_to_float); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, int64_t dim, bool half_to_float); +}; + +struct TORCH_API _softmax_out { + using schema = at::Tensor & (const at::Tensor &, int64_t, bool, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_softmax"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_softmax.out(Tensor self, int dim, bool half_to_float, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, int64_t dim, bool half_to_float, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, int64_t dim, bool half_to_float, at::Tensor & out); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_addmm.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_addmm.h new file mode 100644 index 0000000000000000000000000000000000000000..ac801f917d8263699e491145a855ca88b103a4c3 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_addmm.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_sparse_addmm(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1) -> Tensor +inline at::Tensor _sparse_addmm(const at::Tensor & self, const at::Tensor & mat1, const at::Tensor & mat2, const at::Scalar & beta=1, const at::Scalar & alpha=1) { + return at::_ops::_sparse_addmm::call(self, mat1, mat2, beta, alpha); +} + +// aten::_sparse_addmm.out(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _sparse_addmm_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & mat1, const at::Tensor & mat2, const at::Scalar & beta=1, const at::Scalar & alpha=1) { + return at::_ops::_sparse_addmm_out::call(self, mat1, mat2, beta, alpha, out); +} +// aten::_sparse_addmm.out(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _sparse_addmm_outf(const at::Tensor & self, const at::Tensor & mat1, const at::Tensor & mat2, const at::Scalar & beta, const at::Scalar & alpha, at::Tensor & out) { + return at::_ops::_sparse_addmm_out::call(self, mat1, mat2, beta, alpha, out); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_addmm_compositeexplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_addmm_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..cbfdf3acb1165d22ee1f722667d588bbad5a5c03 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_addmm_compositeexplicitautograd_dispatch.h @@ -0,0 +1,25 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor _sparse_addmm(const at::Tensor & self, const at::Tensor & mat1, const at::Tensor & mat2, const at::Scalar & beta=1, const at::Scalar & alpha=1); +TORCH_API at::Tensor & _sparse_addmm_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & mat1, const at::Tensor & mat2, const at::Scalar & beta=1, const at::Scalar & alpha=1); +TORCH_API at::Tensor & _sparse_addmm_outf(const at::Tensor & self, const at::Tensor & mat1, const at::Tensor & mat2, const at::Scalar & beta, const at::Scalar & alpha, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_addmm_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_addmm_native.h new file mode 100644 index 0000000000000000000000000000000000000000..03e24364a18843a7bb295ace0e209b55f9f5add4 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_addmm_native.h @@ -0,0 +1,22 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor _sparse_addmm(const at::Tensor & self, const at::Tensor & mat1, const at::Tensor & mat2, const at::Scalar & beta=1, const at::Scalar & alpha=1); +TORCH_API at::Tensor & _sparse_addmm_out(const at::Tensor & self, const at::Tensor & mat1, const at::Tensor & mat2, const at::Scalar & beta, const at::Scalar & alpha, at::Tensor & out); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_addmm_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_addmm_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..b08cb9380514b4ef352b7bb52c562b517a05283b --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_addmm_ops.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _sparse_addmm { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &, const at::Tensor &, const at::Scalar &, const at::Scalar &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_sparse_addmm"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_sparse_addmm(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1) -> Tensor"; + static at::Tensor call(const at::Tensor & self, const at::Tensor & mat1, const at::Tensor & mat2, const at::Scalar & beta, const at::Scalar & alpha); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & mat1, const at::Tensor & mat2, const at::Scalar & beta, const at::Scalar & alpha); +}; + +struct TORCH_API _sparse_addmm_out { + using schema = at::Tensor & (const at::Tensor &, const at::Tensor &, const at::Tensor &, const at::Scalar &, const at::Scalar &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_sparse_addmm"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_sparse_addmm.out(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, const at::Tensor & mat1, const at::Tensor & mat2, const at::Scalar & beta, const at::Scalar & alpha, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & mat1, const at::Tensor & mat2, const at::Scalar & beta, const at::Scalar & alpha, at::Tensor & out); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_broadcast_to.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_broadcast_to.h new file mode 100644 index 0000000000000000000000000000000000000000..ee33514338742762bb6a4d6d6c6afc9092c5c569 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_broadcast_to.h @@ -0,0 +1,31 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_sparse_broadcast_to(Tensor(a) self, int[] size) -> Tensor(a) +inline at::Tensor _sparse_broadcast_to(const at::Tensor & self, at::IntArrayRef size) { + return at::_ops::_sparse_broadcast_to::call(self, size); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_broadcast_to_copy.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_broadcast_to_copy.h new file mode 100644 index 0000000000000000000000000000000000000000..3606b51632a49ea5ee638327050050d1fc54912a --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_broadcast_to_copy.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_sparse_broadcast_to_copy(Tensor self, int[] size) -> Tensor +inline at::Tensor _sparse_broadcast_to_copy(const at::Tensor & self, at::IntArrayRef size) { + return at::_ops::_sparse_broadcast_to_copy::call(self, size); +} + +// aten::_sparse_broadcast_to_copy.out(Tensor self, int[] size, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _sparse_broadcast_to_copy_out(at::Tensor & out, const at::Tensor & self, at::IntArrayRef size) { + return at::_ops::_sparse_broadcast_to_copy_out::call(self, size, out); +} +// aten::_sparse_broadcast_to_copy.out(Tensor self, int[] size, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _sparse_broadcast_to_copy_outf(const at::Tensor & self, at::IntArrayRef size, at::Tensor & out) { + return at::_ops::_sparse_broadcast_to_copy_out::call(self, size, out); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_broadcast_to_copy_compositeexplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_broadcast_to_copy_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..03111bb9f4991608e6ac03632a202d6cc3b277ee --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_broadcast_to_copy_compositeexplicitautograd_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor & _sparse_broadcast_to_copy_out(at::Tensor & out, const at::Tensor & self, at::IntArrayRef size); +TORCH_API at::Tensor & _sparse_broadcast_to_copy_outf(const at::Tensor & self, at::IntArrayRef size, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_broadcast_to_copy_compositeexplicitautogradnonfunctional_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_broadcast_to_copy_compositeexplicitautogradnonfunctional_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..0380fbb9e3e8c664c77def05c7d4181175922da9 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_broadcast_to_copy_compositeexplicitautogradnonfunctional_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautogradnonfunctional { + +TORCH_API at::Tensor _sparse_broadcast_to_copy(const at::Tensor & self, at::IntArrayRef size); + +} // namespace compositeexplicitautogradnonfunctional +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_broadcast_to_copy_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_broadcast_to_copy_native.h new file mode 100644 index 0000000000000000000000000000000000000000..b5a7dacb925466f3ed72bdbd614087f657a991ea --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_broadcast_to_copy_native.h @@ -0,0 +1,22 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor & _sparse_broadcast_to_copy_out(const at::Tensor & self, at::IntArrayRef size, at::Tensor & out); +TORCH_API at::Tensor _sparse_broadcast_to_copy(const at::Tensor & self, at::IntArrayRef size); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_broadcast_to_copy_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_broadcast_to_copy_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..be630680a6f9d6336d0aa9933b2f60bb74707ae7 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_broadcast_to_copy_ops.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _sparse_broadcast_to_copy { + using schema = at::Tensor (const at::Tensor &, at::IntArrayRef); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_sparse_broadcast_to_copy"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_sparse_broadcast_to_copy(Tensor self, int[] size) -> Tensor"; + static at::Tensor call(const at::Tensor & self, at::IntArrayRef size); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::IntArrayRef size); +}; + +struct TORCH_API _sparse_broadcast_to_copy_out { + using schema = at::Tensor & (const at::Tensor &, at::IntArrayRef, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_sparse_broadcast_to_copy"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_sparse_broadcast_to_copy.out(Tensor self, int[] size, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, at::IntArrayRef size, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::IntArrayRef size, at::Tensor & out); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_broadcast_to_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_broadcast_to_native.h new file mode 100644 index 0000000000000000000000000000000000000000..7e2ea4978a6b3a724b0098de57a063ea44437ba5 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_broadcast_to_native.h @@ -0,0 +1,21 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor sparse_broadcast_to(const at::Tensor & self, at::IntArrayRef size); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_broadcast_to_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_broadcast_to_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..aa17d81012f17da6be0cca7fcb3f38078bf52174 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_broadcast_to_ops.h @@ -0,0 +1,29 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _sparse_broadcast_to { + using schema = at::Tensor (const at::Tensor &, at::IntArrayRef); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_sparse_broadcast_to"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_sparse_broadcast_to(Tensor(a) self, int[] size) -> Tensor(a)"; + static at::Tensor call(const at::Tensor & self, at::IntArrayRef size); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::IntArrayRef size); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_bsc_tensor_unsafe.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_bsc_tensor_unsafe.h new file mode 100644 index 0000000000000000000000000000000000000000..f4c16324f96c39186c2858c4c30787cba8df273b --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_bsc_tensor_unsafe.h @@ -0,0 +1,35 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_sparse_bsc_tensor_unsafe(Tensor ccol_indices, Tensor row_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor _sparse_bsc_tensor_unsafe(const at::Tensor & ccol_indices, const at::Tensor & row_indices, const at::Tensor & values, at::IntArrayRef size, at::TensorOptions options={}) { + return at::_ops::_sparse_bsc_tensor_unsafe::call(ccol_indices, row_indices, values, size, c10::optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt()); +} +// aten::_sparse_bsc_tensor_unsafe(Tensor ccol_indices, Tensor row_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor _sparse_bsc_tensor_unsafe(const at::Tensor & ccol_indices, const at::Tensor & row_indices, const at::Tensor & values, at::IntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory) { + return at::_ops::_sparse_bsc_tensor_unsafe::call(ccol_indices, row_indices, values, size, dtype, layout, device, pin_memory); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_bsc_tensor_unsafe_compositeimplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_bsc_tensor_unsafe_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..c83e594ef55aec417b225185e256857ef1241d15 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_bsc_tensor_unsafe_compositeimplicitautograd_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor _sparse_bsc_tensor_unsafe(const at::Tensor & ccol_indices, const at::Tensor & row_indices, const at::Tensor & values, at::IntArrayRef size, at::TensorOptions options={}); +TORCH_API at::Tensor _sparse_bsc_tensor_unsafe(const at::Tensor & ccol_indices, const at::Tensor & row_indices, const at::Tensor & values, at::IntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); + +} // namespace compositeimplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_bsc_tensor_unsafe_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_bsc_tensor_unsafe_native.h new file mode 100644 index 0000000000000000000000000000000000000000..6ffa3840d5424b96d19b7de01e86dac131ca1122 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_bsc_tensor_unsafe_native.h @@ -0,0 +1,21 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor _sparse_bsc_tensor_unsafe(const at::Tensor & ccol_indices, const at::Tensor & row_indices, const at::Tensor & values, at::IntArrayRef size, ::std::optional dtype={}, ::std::optional layout={}, ::std::optional device={}, ::std::optional pin_memory={}); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_bsc_tensor_unsafe_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_bsc_tensor_unsafe_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..9a22f586734e815613bc22fe8718c509886d534c --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_bsc_tensor_unsafe_ops.h @@ -0,0 +1,29 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _sparse_bsc_tensor_unsafe { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &, const at::Tensor &, at::IntArrayRef, ::std::optional, ::std::optional, ::std::optional, ::std::optional); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_sparse_bsc_tensor_unsafe"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_sparse_bsc_tensor_unsafe(Tensor ccol_indices, Tensor row_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor"; + static at::Tensor call(const at::Tensor & ccol_indices, const at::Tensor & row_indices, const at::Tensor & values, at::IntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & ccol_indices, const at::Tensor & row_indices, const at::Tensor & values, at::IntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_bsr_tensor_unsafe.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_bsr_tensor_unsafe.h new file mode 100644 index 0000000000000000000000000000000000000000..e3aaad52324e9e9478090b5cb14ec23bf675b053 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_bsr_tensor_unsafe.h @@ -0,0 +1,35 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_sparse_bsr_tensor_unsafe(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor _sparse_bsr_tensor_unsafe(const at::Tensor & crow_indices, const at::Tensor & col_indices, const at::Tensor & values, at::IntArrayRef size, at::TensorOptions options={}) { + return at::_ops::_sparse_bsr_tensor_unsafe::call(crow_indices, col_indices, values, size, c10::optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt()); +} +// aten::_sparse_bsr_tensor_unsafe(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor _sparse_bsr_tensor_unsafe(const at::Tensor & crow_indices, const at::Tensor & col_indices, const at::Tensor & values, at::IntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory) { + return at::_ops::_sparse_bsr_tensor_unsafe::call(crow_indices, col_indices, values, size, dtype, layout, device, pin_memory); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_bsr_tensor_unsafe_compositeimplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_bsr_tensor_unsafe_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..550f4e3c122978143b4ef91fa6aebd85248d0a18 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_bsr_tensor_unsafe_compositeimplicitautograd_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor _sparse_bsr_tensor_unsafe(const at::Tensor & crow_indices, const at::Tensor & col_indices, const at::Tensor & values, at::IntArrayRef size, at::TensorOptions options={}); +TORCH_API at::Tensor _sparse_bsr_tensor_unsafe(const at::Tensor & crow_indices, const at::Tensor & col_indices, const at::Tensor & values, at::IntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); + +} // namespace compositeimplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_bsr_tensor_unsafe_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_bsr_tensor_unsafe_native.h new file mode 100644 index 0000000000000000000000000000000000000000..81e0552656025dc2b32de5661bd5347fe7cf4859 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_bsr_tensor_unsafe_native.h @@ -0,0 +1,21 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor _sparse_bsr_tensor_unsafe(const at::Tensor & crow_indices, const at::Tensor & col_indices, const at::Tensor & values, at::IntArrayRef size, ::std::optional dtype={}, ::std::optional layout={}, ::std::optional device={}, ::std::optional pin_memory={}); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_bsr_tensor_unsafe_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_bsr_tensor_unsafe_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..ab1f977fd728de745816739245b4f737b8dcc79a --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_bsr_tensor_unsafe_ops.h @@ -0,0 +1,29 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _sparse_bsr_tensor_unsafe { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &, const at::Tensor &, at::IntArrayRef, ::std::optional, ::std::optional, ::std::optional, ::std::optional); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_sparse_bsr_tensor_unsafe"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_sparse_bsr_tensor_unsafe(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor"; + static at::Tensor call(const at::Tensor & crow_indices, const at::Tensor & col_indices, const at::Tensor & values, at::IntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & crow_indices, const at::Tensor & col_indices, const at::Tensor & values, at::IntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_compressed_tensor_unsafe.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_compressed_tensor_unsafe.h new file mode 100644 index 0000000000000000000000000000000000000000..0875ca9a7102bae2a6a00d6075efce7607258db5 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_compressed_tensor_unsafe.h @@ -0,0 +1,70 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_sparse_compressed_tensor_unsafe(Tensor compressed_indices, Tensor plain_indices, Tensor values, SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor _sparse_compressed_tensor_unsafe(const at::Tensor & compressed_indices, const at::Tensor & plain_indices, const at::Tensor & values, at::IntArrayRef size, at::TensorOptions options={}) { + return at::_ops::_sparse_compressed_tensor_unsafe::call(compressed_indices, plain_indices, values, c10::fromIntArrayRefSlow(size), c10::optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt()); +} +namespace symint { + template >> + at::Tensor _sparse_compressed_tensor_unsafe(const at::Tensor & compressed_indices, const at::Tensor & plain_indices, const at::Tensor & values, at::IntArrayRef size, at::TensorOptions options={}) { + return at::_ops::_sparse_compressed_tensor_unsafe::call(compressed_indices, plain_indices, values, c10::fromIntArrayRefSlow(size), c10::optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt()); + } +} + +// aten::_sparse_compressed_tensor_unsafe(Tensor compressed_indices, Tensor plain_indices, Tensor values, SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor _sparse_compressed_tensor_unsafe(const at::Tensor & compressed_indices, const at::Tensor & plain_indices, const at::Tensor & values, at::IntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory) { + return at::_ops::_sparse_compressed_tensor_unsafe::call(compressed_indices, plain_indices, values, c10::fromIntArrayRefSlow(size), dtype, layout, device, pin_memory); +} +namespace symint { + template >> + at::Tensor _sparse_compressed_tensor_unsafe(const at::Tensor & compressed_indices, const at::Tensor & plain_indices, const at::Tensor & values, at::IntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory) { + return at::_ops::_sparse_compressed_tensor_unsafe::call(compressed_indices, plain_indices, values, c10::fromIntArrayRefSlow(size), dtype, layout, device, pin_memory); + } +} + +// aten::_sparse_compressed_tensor_unsafe(Tensor compressed_indices, Tensor plain_indices, Tensor values, SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor _sparse_compressed_tensor_unsafe_symint(const at::Tensor & compressed_indices, const at::Tensor & plain_indices, const at::Tensor & values, c10::SymIntArrayRef size, at::TensorOptions options={}) { + return at::_ops::_sparse_compressed_tensor_unsafe::call(compressed_indices, plain_indices, values, size, c10::optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt()); +} +namespace symint { + template >> + at::Tensor _sparse_compressed_tensor_unsafe(const at::Tensor & compressed_indices, const at::Tensor & plain_indices, const at::Tensor & values, c10::SymIntArrayRef size, at::TensorOptions options={}) { + return at::_ops::_sparse_compressed_tensor_unsafe::call(compressed_indices, plain_indices, values, size, c10::optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt()); + } +} + +// aten::_sparse_compressed_tensor_unsafe(Tensor compressed_indices, Tensor plain_indices, Tensor values, SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor _sparse_compressed_tensor_unsafe_symint(const at::Tensor & compressed_indices, const at::Tensor & plain_indices, const at::Tensor & values, c10::SymIntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory) { + return at::_ops::_sparse_compressed_tensor_unsafe::call(compressed_indices, plain_indices, values, size, dtype, layout, device, pin_memory); +} +namespace symint { + template >> + at::Tensor _sparse_compressed_tensor_unsafe(const at::Tensor & compressed_indices, const at::Tensor & plain_indices, const at::Tensor & values, c10::SymIntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory) { + return at::_ops::_sparse_compressed_tensor_unsafe::call(compressed_indices, plain_indices, values, size, dtype, layout, device, pin_memory); + } +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_compressed_tensor_unsafe_compositeimplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_compressed_tensor_unsafe_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..3d86ab85cf6cac818f927d5c03baf0ebb4cfba09 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_compressed_tensor_unsafe_compositeimplicitautograd_dispatch.h @@ -0,0 +1,26 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor _sparse_compressed_tensor_unsafe(const at::Tensor & compressed_indices, const at::Tensor & plain_indices, const at::Tensor & values, at::IntArrayRef size, at::TensorOptions options={}); +TORCH_API at::Tensor _sparse_compressed_tensor_unsafe(const at::Tensor & compressed_indices, const at::Tensor & plain_indices, const at::Tensor & values, at::IntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); +TORCH_API at::Tensor _sparse_compressed_tensor_unsafe_symint(const at::Tensor & compressed_indices, const at::Tensor & plain_indices, const at::Tensor & values, c10::SymIntArrayRef size, at::TensorOptions options={}); +TORCH_API at::Tensor _sparse_compressed_tensor_unsafe_symint(const at::Tensor & compressed_indices, const at::Tensor & plain_indices, const at::Tensor & values, c10::SymIntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); + +} // namespace compositeimplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_compressed_tensor_unsafe_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_compressed_tensor_unsafe_native.h new file mode 100644 index 0000000000000000000000000000000000000000..08819063cf4ab8a66ed4e2d1540a72c7e67fa3d9 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_compressed_tensor_unsafe_native.h @@ -0,0 +1,21 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor _sparse_compressed_tensor_unsafe_symint(const at::Tensor & compressed_indices, const at::Tensor & plain_indices, const at::Tensor & values, c10::SymIntArrayRef size, ::std::optional dtype={}, ::std::optional layout={}, ::std::optional device={}, ::std::optional pin_memory={}); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_compressed_tensor_unsafe_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_compressed_tensor_unsafe_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..19cf9c1ed2fefa5460057687fa0912d6f8ea6cff --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_compressed_tensor_unsafe_ops.h @@ -0,0 +1,29 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _sparse_compressed_tensor_unsafe { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &, const at::Tensor &, c10::SymIntArrayRef, ::std::optional, ::std::optional, ::std::optional, ::std::optional); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_sparse_compressed_tensor_unsafe"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_sparse_compressed_tensor_unsafe(Tensor compressed_indices, Tensor plain_indices, Tensor values, SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor"; + static at::Tensor call(const at::Tensor & compressed_indices, const at::Tensor & plain_indices, const at::Tensor & values, c10::SymIntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & compressed_indices, const at::Tensor & plain_indices, const at::Tensor & values, c10::SymIntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_compressed_tensor_with_dims.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_compressed_tensor_with_dims.h new file mode 100644 index 0000000000000000000000000000000000000000..965e66b4efbe74563514d497bef2b60d6746d410 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_compressed_tensor_with_dims.h @@ -0,0 +1,35 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_sparse_compressed_tensor_with_dims(int nnz, int dense_dim, int[] size, int[] blocksize, ScalarType index_dtype, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor +inline at::Tensor _sparse_compressed_tensor_with_dims(int64_t nnz, int64_t dense_dim, at::IntArrayRef size, at::IntArrayRef blocksize, at::ScalarType index_dtype, at::TensorOptions options) { + return at::_ops::_sparse_compressed_tensor_with_dims::call(nnz, dense_dim, size, blocksize, index_dtype, c10::optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt()); +} +// aten::_sparse_compressed_tensor_with_dims(int nnz, int dense_dim, int[] size, int[] blocksize, ScalarType index_dtype, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor +inline at::Tensor _sparse_compressed_tensor_with_dims(int64_t nnz, int64_t dense_dim, at::IntArrayRef size, at::IntArrayRef blocksize, at::ScalarType index_dtype, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory) { + return at::_ops::_sparse_compressed_tensor_with_dims::call(nnz, dense_dim, size, blocksize, index_dtype, dtype, layout, device, pin_memory); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_compressed_tensor_with_dims_compositeexplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_compressed_tensor_with_dims_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..c29d1c4705ce4f627b5a5102771e6c9100bcf7dc --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_compressed_tensor_with_dims_compositeexplicitautograd_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor _sparse_compressed_tensor_with_dims(int64_t nnz, int64_t dense_dim, at::IntArrayRef size, at::IntArrayRef blocksize, at::ScalarType index_dtype, at::TensorOptions options); +TORCH_API at::Tensor _sparse_compressed_tensor_with_dims(int64_t nnz, int64_t dense_dim, at::IntArrayRef size, at::IntArrayRef blocksize, at::ScalarType index_dtype, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); + +} // namespace compositeexplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_compressed_tensor_with_dims_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_compressed_tensor_with_dims_native.h new file mode 100644 index 0000000000000000000000000000000000000000..8bed2ca0771bf6943184d24233ce9ee732b98620 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_compressed_tensor_with_dims_native.h @@ -0,0 +1,21 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor sparse_compressed_tensor_with_dims(int64_t nnz, int64_t dense_dim, at::IntArrayRef size, at::IntArrayRef blocksize, at::ScalarType index_dtype, ::std::optional dtype={}, ::std::optional layout={}, ::std::optional device={}, ::std::optional pin_memory={}); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_compressed_tensor_with_dims_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_compressed_tensor_with_dims_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..7262704c2d0afb67b713b13f7c2a3f07657133be --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_compressed_tensor_with_dims_ops.h @@ -0,0 +1,29 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _sparse_compressed_tensor_with_dims { + using schema = at::Tensor (int64_t, int64_t, at::IntArrayRef, at::IntArrayRef, at::ScalarType, ::std::optional, ::std::optional, ::std::optional, ::std::optional); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_sparse_compressed_tensor_with_dims"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_sparse_compressed_tensor_with_dims(int nnz, int dense_dim, int[] size, int[] blocksize, ScalarType index_dtype, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor"; + static at::Tensor call(int64_t nnz, int64_t dense_dim, at::IntArrayRef size, at::IntArrayRef blocksize, at::ScalarType index_dtype, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, int64_t nnz, int64_t dense_dim, at::IntArrayRef size, at::IntArrayRef blocksize, at::ScalarType index_dtype, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_coo_tensor_unsafe.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_coo_tensor_unsafe.h new file mode 100644 index 0000000000000000000000000000000000000000..8dcd14bbdd9399127758e668128cd1f1ab9172ad --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_coo_tensor_unsafe.h @@ -0,0 +1,70 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_sparse_coo_tensor_unsafe(Tensor indices, Tensor values, SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, bool? is_coalesced=None) -> Tensor +inline at::Tensor _sparse_coo_tensor_unsafe(const at::Tensor & indices, const at::Tensor & values, at::IntArrayRef size, at::TensorOptions options={}, ::std::optional is_coalesced=::std::nullopt) { + return at::_ops::_sparse_coo_tensor_unsafe::call(indices, values, c10::fromIntArrayRefSlow(size), c10::optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt(), is_coalesced); +} +namespace symint { + template >> + at::Tensor _sparse_coo_tensor_unsafe(const at::Tensor & indices, const at::Tensor & values, at::IntArrayRef size, at::TensorOptions options={}, ::std::optional is_coalesced=::std::nullopt) { + return at::_ops::_sparse_coo_tensor_unsafe::call(indices, values, c10::fromIntArrayRefSlow(size), c10::optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt(), is_coalesced); + } +} + +// aten::_sparse_coo_tensor_unsafe(Tensor indices, Tensor values, SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, bool? is_coalesced=None) -> Tensor +inline at::Tensor _sparse_coo_tensor_unsafe(const at::Tensor & indices, const at::Tensor & values, at::IntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory, ::std::optional is_coalesced) { + return at::_ops::_sparse_coo_tensor_unsafe::call(indices, values, c10::fromIntArrayRefSlow(size), dtype, layout, device, pin_memory, is_coalesced); +} +namespace symint { + template >> + at::Tensor _sparse_coo_tensor_unsafe(const at::Tensor & indices, const at::Tensor & values, at::IntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory, ::std::optional is_coalesced) { + return at::_ops::_sparse_coo_tensor_unsafe::call(indices, values, c10::fromIntArrayRefSlow(size), dtype, layout, device, pin_memory, is_coalesced); + } +} + +// aten::_sparse_coo_tensor_unsafe(Tensor indices, Tensor values, SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, bool? is_coalesced=None) -> Tensor +inline at::Tensor _sparse_coo_tensor_unsafe_symint(const at::Tensor & indices, const at::Tensor & values, c10::SymIntArrayRef size, at::TensorOptions options={}, ::std::optional is_coalesced=::std::nullopt) { + return at::_ops::_sparse_coo_tensor_unsafe::call(indices, values, size, c10::optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt(), is_coalesced); +} +namespace symint { + template >> + at::Tensor _sparse_coo_tensor_unsafe(const at::Tensor & indices, const at::Tensor & values, c10::SymIntArrayRef size, at::TensorOptions options={}, ::std::optional is_coalesced=::std::nullopt) { + return at::_ops::_sparse_coo_tensor_unsafe::call(indices, values, size, c10::optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt(), is_coalesced); + } +} + +// aten::_sparse_coo_tensor_unsafe(Tensor indices, Tensor values, SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, bool? is_coalesced=None) -> Tensor +inline at::Tensor _sparse_coo_tensor_unsafe_symint(const at::Tensor & indices, const at::Tensor & values, c10::SymIntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory, ::std::optional is_coalesced) { + return at::_ops::_sparse_coo_tensor_unsafe::call(indices, values, size, dtype, layout, device, pin_memory, is_coalesced); +} +namespace symint { + template >> + at::Tensor _sparse_coo_tensor_unsafe(const at::Tensor & indices, const at::Tensor & values, c10::SymIntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory, ::std::optional is_coalesced) { + return at::_ops::_sparse_coo_tensor_unsafe::call(indices, values, size, dtype, layout, device, pin_memory, is_coalesced); + } +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_coo_tensor_unsafe_compositeimplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_coo_tensor_unsafe_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..940c6f3c42dbf424988eb02e0ec0042aa53623bc --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_coo_tensor_unsafe_compositeimplicitautograd_dispatch.h @@ -0,0 +1,26 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor _sparse_coo_tensor_unsafe(const at::Tensor & indices, const at::Tensor & values, at::IntArrayRef size, at::TensorOptions options={}, ::std::optional is_coalesced=::std::nullopt); +TORCH_API at::Tensor _sparse_coo_tensor_unsafe(const at::Tensor & indices, const at::Tensor & values, at::IntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory, ::std::optional is_coalesced); +TORCH_API at::Tensor _sparse_coo_tensor_unsafe_symint(const at::Tensor & indices, const at::Tensor & values, c10::SymIntArrayRef size, at::TensorOptions options={}, ::std::optional is_coalesced=::std::nullopt); +TORCH_API at::Tensor _sparse_coo_tensor_unsafe_symint(const at::Tensor & indices, const at::Tensor & values, c10::SymIntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory, ::std::optional is_coalesced); + +} // namespace compositeimplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_coo_tensor_unsafe_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_coo_tensor_unsafe_native.h new file mode 100644 index 0000000000000000000000000000000000000000..08215547a4b614805fef557c8b30863826783f03 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_coo_tensor_unsafe_native.h @@ -0,0 +1,21 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor _sparse_coo_tensor_unsafe_symint(const at::Tensor & indices, const at::Tensor & values, c10::SymIntArrayRef size, ::std::optional dtype={}, ::std::optional layout={}, ::std::optional device={}, ::std::optional pin_memory={}, ::std::optional is_coalesced=::std::nullopt); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_coo_tensor_unsafe_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_coo_tensor_unsafe_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..3053687cc00f5c04d6f79f33f48439f7cd6e02e7 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_coo_tensor_unsafe_ops.h @@ -0,0 +1,29 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _sparse_coo_tensor_unsafe { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &, c10::SymIntArrayRef, ::std::optional, ::std::optional, ::std::optional, ::std::optional, ::std::optional); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_sparse_coo_tensor_unsafe"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_sparse_coo_tensor_unsafe(Tensor indices, Tensor values, SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, bool? is_coalesced=None) -> Tensor"; + static at::Tensor call(const at::Tensor & indices, const at::Tensor & values, c10::SymIntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory, ::std::optional is_coalesced); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & indices, const at::Tensor & values, c10::SymIntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory, ::std::optional is_coalesced); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_coo_tensor_with_dims.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_coo_tensor_with_dims.h new file mode 100644 index 0000000000000000000000000000000000000000..81c7d12e02e733174c1215b1254fa0eca837dd0d --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_coo_tensor_with_dims.h @@ -0,0 +1,44 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_sparse_coo_tensor_with_dims(int sparse_dim, int dense_dim, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor +inline at::Tensor _sparse_coo_tensor_with_dims(int64_t sparse_dim, int64_t dense_dim, at::IntArrayRef size, at::TensorOptions options) { + return at::_ops::_sparse_coo_tensor_with_dims::call(sparse_dim, dense_dim, size, c10::optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt()); +} +// aten::_sparse_coo_tensor_with_dims(int sparse_dim, int dense_dim, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor +inline at::Tensor _sparse_coo_tensor_with_dims(int64_t sparse_dim, int64_t dense_dim, at::IntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory) { + return at::_ops::_sparse_coo_tensor_with_dims::call(sparse_dim, dense_dim, size, dtype, layout, device, pin_memory); +} + +// aten::_sparse_coo_tensor_with_dims.out(int sparse_dim, int dense_dim, int[] size, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _sparse_coo_tensor_with_dims_out(at::Tensor & out, int64_t sparse_dim, int64_t dense_dim, at::IntArrayRef size) { + return at::_ops::_sparse_coo_tensor_with_dims_out::call(sparse_dim, dense_dim, size, out); +} +// aten::_sparse_coo_tensor_with_dims.out(int sparse_dim, int dense_dim, int[] size, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _sparse_coo_tensor_with_dims_outf(int64_t sparse_dim, int64_t dense_dim, at::IntArrayRef size, at::Tensor & out) { + return at::_ops::_sparse_coo_tensor_with_dims_out::call(sparse_dim, dense_dim, size, out); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_coo_tensor_with_dims_and_tensors.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_coo_tensor_with_dims_and_tensors.h new file mode 100644 index 0000000000000000000000000000000000000000..3f67ab5d9ec4d28e081448ae43443394769b17f7 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_coo_tensor_with_dims_and_tensors.h @@ -0,0 +1,114 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_sparse_coo_tensor_with_dims_and_tensors(int sparse_dim, int dense_dim, SymInt[] size, Tensor indices, Tensor values, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False, bool? is_coalesced=None) -> Tensor +inline at::Tensor _sparse_coo_tensor_with_dims_and_tensors(int64_t sparse_dim, int64_t dense_dim, at::IntArrayRef size, const at::Tensor & indices, const at::Tensor & values, at::TensorOptions options, ::std::optional is_coalesced=::std::nullopt) { + return at::_ops::_sparse_coo_tensor_with_dims_and_tensors::call(sparse_dim, dense_dim, c10::fromIntArrayRefSlow(size), indices, values, c10::optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt(), is_coalesced); +} +namespace symint { + template >> + at::Tensor _sparse_coo_tensor_with_dims_and_tensors(int64_t sparse_dim, int64_t dense_dim, at::IntArrayRef size, const at::Tensor & indices, const at::Tensor & values, at::TensorOptions options, ::std::optional is_coalesced=::std::nullopt) { + return at::_ops::_sparse_coo_tensor_with_dims_and_tensors::call(sparse_dim, dense_dim, c10::fromIntArrayRefSlow(size), indices, values, c10::optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt(), is_coalesced); + } +} + +// aten::_sparse_coo_tensor_with_dims_and_tensors(int sparse_dim, int dense_dim, SymInt[] size, Tensor indices, Tensor values, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False, bool? is_coalesced=None) -> Tensor +inline at::Tensor _sparse_coo_tensor_with_dims_and_tensors(int64_t sparse_dim, int64_t dense_dim, at::IntArrayRef size, const at::Tensor & indices, const at::Tensor & values, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory, ::std::optional is_coalesced) { + return at::_ops::_sparse_coo_tensor_with_dims_and_tensors::call(sparse_dim, dense_dim, c10::fromIntArrayRefSlow(size), indices, values, dtype, layout, device, pin_memory, is_coalesced); +} +namespace symint { + template >> + at::Tensor _sparse_coo_tensor_with_dims_and_tensors(int64_t sparse_dim, int64_t dense_dim, at::IntArrayRef size, const at::Tensor & indices, const at::Tensor & values, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory, ::std::optional is_coalesced) { + return at::_ops::_sparse_coo_tensor_with_dims_and_tensors::call(sparse_dim, dense_dim, c10::fromIntArrayRefSlow(size), indices, values, dtype, layout, device, pin_memory, is_coalesced); + } +} + +// aten::_sparse_coo_tensor_with_dims_and_tensors(int sparse_dim, int dense_dim, SymInt[] size, Tensor indices, Tensor values, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False, bool? is_coalesced=None) -> Tensor +inline at::Tensor _sparse_coo_tensor_with_dims_and_tensors_symint(int64_t sparse_dim, int64_t dense_dim, c10::SymIntArrayRef size, const at::Tensor & indices, const at::Tensor & values, at::TensorOptions options, ::std::optional is_coalesced=::std::nullopt) { + return at::_ops::_sparse_coo_tensor_with_dims_and_tensors::call(sparse_dim, dense_dim, size, indices, values, c10::optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt(), is_coalesced); +} +namespace symint { + template >> + at::Tensor _sparse_coo_tensor_with_dims_and_tensors(int64_t sparse_dim, int64_t dense_dim, c10::SymIntArrayRef size, const at::Tensor & indices, const at::Tensor & values, at::TensorOptions options, ::std::optional is_coalesced=::std::nullopt) { + return at::_ops::_sparse_coo_tensor_with_dims_and_tensors::call(sparse_dim, dense_dim, size, indices, values, c10::optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt(), is_coalesced); + } +} + +// aten::_sparse_coo_tensor_with_dims_and_tensors(int sparse_dim, int dense_dim, SymInt[] size, Tensor indices, Tensor values, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False, bool? is_coalesced=None) -> Tensor +inline at::Tensor _sparse_coo_tensor_with_dims_and_tensors_symint(int64_t sparse_dim, int64_t dense_dim, c10::SymIntArrayRef size, const at::Tensor & indices, const at::Tensor & values, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory, ::std::optional is_coalesced) { + return at::_ops::_sparse_coo_tensor_with_dims_and_tensors::call(sparse_dim, dense_dim, size, indices, values, dtype, layout, device, pin_memory, is_coalesced); +} +namespace symint { + template >> + at::Tensor _sparse_coo_tensor_with_dims_and_tensors(int64_t sparse_dim, int64_t dense_dim, c10::SymIntArrayRef size, const at::Tensor & indices, const at::Tensor & values, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory, ::std::optional is_coalesced) { + return at::_ops::_sparse_coo_tensor_with_dims_and_tensors::call(sparse_dim, dense_dim, size, indices, values, dtype, layout, device, pin_memory, is_coalesced); + } +} + +// aten::_sparse_coo_tensor_with_dims_and_tensors.out(int sparse_dim, int dense_dim, SymInt[] size, Tensor indices, Tensor values, *, bool? is_coalesced=None, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _sparse_coo_tensor_with_dims_and_tensors_out(at::Tensor & out, int64_t sparse_dim, int64_t dense_dim, at::IntArrayRef size, const at::Tensor & indices, const at::Tensor & values, ::std::optional is_coalesced=::std::nullopt) { + return at::_ops::_sparse_coo_tensor_with_dims_and_tensors_out::call(sparse_dim, dense_dim, c10::fromIntArrayRefSlow(size), indices, values, is_coalesced, out); +} +namespace symint { + template >> + at::Tensor & _sparse_coo_tensor_with_dims_and_tensors_out(at::Tensor & out, int64_t sparse_dim, int64_t dense_dim, at::IntArrayRef size, const at::Tensor & indices, const at::Tensor & values, ::std::optional is_coalesced=::std::nullopt) { + return at::_ops::_sparse_coo_tensor_with_dims_and_tensors_out::call(sparse_dim, dense_dim, c10::fromIntArrayRefSlow(size), indices, values, is_coalesced, out); + } +} + +// aten::_sparse_coo_tensor_with_dims_and_tensors.out(int sparse_dim, int dense_dim, SymInt[] size, Tensor indices, Tensor values, *, bool? is_coalesced=None, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _sparse_coo_tensor_with_dims_and_tensors_outf(int64_t sparse_dim, int64_t dense_dim, at::IntArrayRef size, const at::Tensor & indices, const at::Tensor & values, ::std::optional is_coalesced, at::Tensor & out) { + return at::_ops::_sparse_coo_tensor_with_dims_and_tensors_out::call(sparse_dim, dense_dim, c10::fromIntArrayRefSlow(size), indices, values, is_coalesced, out); +} +namespace symint { + template >> + at::Tensor & _sparse_coo_tensor_with_dims_and_tensors_outf(int64_t sparse_dim, int64_t dense_dim, at::IntArrayRef size, const at::Tensor & indices, const at::Tensor & values, ::std::optional is_coalesced, at::Tensor & out) { + return at::_ops::_sparse_coo_tensor_with_dims_and_tensors_out::call(sparse_dim, dense_dim, c10::fromIntArrayRefSlow(size), indices, values, is_coalesced, out); + } +} + +// aten::_sparse_coo_tensor_with_dims_and_tensors.out(int sparse_dim, int dense_dim, SymInt[] size, Tensor indices, Tensor values, *, bool? is_coalesced=None, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _sparse_coo_tensor_with_dims_and_tensors_symint_out(at::Tensor & out, int64_t sparse_dim, int64_t dense_dim, c10::SymIntArrayRef size, const at::Tensor & indices, const at::Tensor & values, ::std::optional is_coalesced=::std::nullopt) { + return at::_ops::_sparse_coo_tensor_with_dims_and_tensors_out::call(sparse_dim, dense_dim, size, indices, values, is_coalesced, out); +} +namespace symint { + template >> + at::Tensor & _sparse_coo_tensor_with_dims_and_tensors_out(at::Tensor & out, int64_t sparse_dim, int64_t dense_dim, c10::SymIntArrayRef size, const at::Tensor & indices, const at::Tensor & values, ::std::optional is_coalesced=::std::nullopt) { + return at::_ops::_sparse_coo_tensor_with_dims_and_tensors_out::call(sparse_dim, dense_dim, size, indices, values, is_coalesced, out); + } +} + +// aten::_sparse_coo_tensor_with_dims_and_tensors.out(int sparse_dim, int dense_dim, SymInt[] size, Tensor indices, Tensor values, *, bool? is_coalesced=None, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _sparse_coo_tensor_with_dims_and_tensors_symint_outf(int64_t sparse_dim, int64_t dense_dim, c10::SymIntArrayRef size, const at::Tensor & indices, const at::Tensor & values, ::std::optional is_coalesced, at::Tensor & out) { + return at::_ops::_sparse_coo_tensor_with_dims_and_tensors_out::call(sparse_dim, dense_dim, size, indices, values, is_coalesced, out); +} +namespace symint { + template >> + at::Tensor & _sparse_coo_tensor_with_dims_and_tensors_outf(int64_t sparse_dim, int64_t dense_dim, c10::SymIntArrayRef size, const at::Tensor & indices, const at::Tensor & values, ::std::optional is_coalesced, at::Tensor & out) { + return at::_ops::_sparse_coo_tensor_with_dims_and_tensors_out::call(sparse_dim, dense_dim, size, indices, values, is_coalesced, out); + } +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_coo_tensor_with_dims_and_tensors_compositeexplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_coo_tensor_with_dims_and_tensors_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..8f4ae47ae74a052593429dfd5f78560b1961b9a5 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_coo_tensor_with_dims_and_tensors_compositeexplicitautograd_dispatch.h @@ -0,0 +1,26 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor & _sparse_coo_tensor_with_dims_and_tensors_out(at::Tensor & out, int64_t sparse_dim, int64_t dense_dim, at::IntArrayRef size, const at::Tensor & indices, const at::Tensor & values, ::std::optional is_coalesced=::std::nullopt); +TORCH_API at::Tensor & _sparse_coo_tensor_with_dims_and_tensors_outf(int64_t sparse_dim, int64_t dense_dim, at::IntArrayRef size, const at::Tensor & indices, const at::Tensor & values, ::std::optional is_coalesced, at::Tensor & out); +TORCH_API at::Tensor & _sparse_coo_tensor_with_dims_and_tensors_symint_out(at::Tensor & out, int64_t sparse_dim, int64_t dense_dim, c10::SymIntArrayRef size, const at::Tensor & indices, const at::Tensor & values, ::std::optional is_coalesced=::std::nullopt); +TORCH_API at::Tensor & _sparse_coo_tensor_with_dims_and_tensors_symint_outf(int64_t sparse_dim, int64_t dense_dim, c10::SymIntArrayRef size, const at::Tensor & indices, const at::Tensor & values, ::std::optional is_coalesced, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_coo_tensor_with_dims_and_tensors_meta_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_coo_tensor_with_dims_and_tensors_meta_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..87cde9a334f4a660d3eceb7b90f762427f6622e7 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_coo_tensor_with_dims_and_tensors_meta_dispatch.h @@ -0,0 +1,26 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace meta { + +TORCH_API at::Tensor _sparse_coo_tensor_with_dims_and_tensors(int64_t sparse_dim, int64_t dense_dim, at::IntArrayRef size, const at::Tensor & indices, const at::Tensor & values, at::TensorOptions options, ::std::optional is_coalesced=::std::nullopt); +TORCH_API at::Tensor _sparse_coo_tensor_with_dims_and_tensors(int64_t sparse_dim, int64_t dense_dim, at::IntArrayRef size, const at::Tensor & indices, const at::Tensor & values, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory, ::std::optional is_coalesced); +TORCH_API at::Tensor _sparse_coo_tensor_with_dims_and_tensors_symint(int64_t sparse_dim, int64_t dense_dim, c10::SymIntArrayRef size, const at::Tensor & indices, const at::Tensor & values, at::TensorOptions options, ::std::optional is_coalesced=::std::nullopt); +TORCH_API at::Tensor _sparse_coo_tensor_with_dims_and_tensors_symint(int64_t sparse_dim, int64_t dense_dim, c10::SymIntArrayRef size, const at::Tensor & indices, const at::Tensor & values, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory, ::std::optional is_coalesced); + +} // namespace meta +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_coo_tensor_with_dims_and_tensors_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_coo_tensor_with_dims_and_tensors_native.h new file mode 100644 index 0000000000000000000000000000000000000000..c99e79e7cc3104e20d45b4c46b777208b5bf36ef --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_coo_tensor_with_dims_and_tensors_native.h @@ -0,0 +1,22 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor & _sparse_coo_tensor_with_dims_and_tensors_out_symint(int64_t sparse_dim, int64_t dense_dim, c10::SymIntArrayRef size, const at::Tensor & indices, const at::Tensor & values, ::std::optional is_coalesced, at::Tensor & out); +TORCH_API at::Tensor new_with_dims_and_tensor_sparse_symint(int64_t sparse_dim, int64_t dense_dim, c10::SymIntArrayRef size, const at::Tensor & indices, const at::Tensor & values, ::std::optional dtype={}, ::std::optional layout={}, ::std::optional device={}, ::std::optional pin_memory={}, ::std::optional is_coalesced=::std::nullopt); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_coo_tensor_with_dims_and_tensors_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_coo_tensor_with_dims_and_tensors_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..c1c4f753c36b859298a6c62e4bd60697d148fb55 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_coo_tensor_with_dims_and_tensors_ops.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _sparse_coo_tensor_with_dims_and_tensors { + using schema = at::Tensor (int64_t, int64_t, c10::SymIntArrayRef, const at::Tensor &, const at::Tensor &, ::std::optional, ::std::optional, ::std::optional, ::std::optional, ::std::optional); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_sparse_coo_tensor_with_dims_and_tensors"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_sparse_coo_tensor_with_dims_and_tensors(int sparse_dim, int dense_dim, SymInt[] size, Tensor indices, Tensor values, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False, bool? is_coalesced=None) -> Tensor"; + static at::Tensor call(int64_t sparse_dim, int64_t dense_dim, c10::SymIntArrayRef size, const at::Tensor & indices, const at::Tensor & values, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory, ::std::optional is_coalesced); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, int64_t sparse_dim, int64_t dense_dim, c10::SymIntArrayRef size, const at::Tensor & indices, const at::Tensor & values, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory, ::std::optional is_coalesced); +}; + +struct TORCH_API _sparse_coo_tensor_with_dims_and_tensors_out { + using schema = at::Tensor & (int64_t, int64_t, c10::SymIntArrayRef, const at::Tensor &, const at::Tensor &, ::std::optional, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_sparse_coo_tensor_with_dims_and_tensors"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_sparse_coo_tensor_with_dims_and_tensors.out(int sparse_dim, int dense_dim, SymInt[] size, Tensor indices, Tensor values, *, bool? is_coalesced=None, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(int64_t sparse_dim, int64_t dense_dim, c10::SymIntArrayRef size, const at::Tensor & indices, const at::Tensor & values, ::std::optional is_coalesced, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, int64_t sparse_dim, int64_t dense_dim, c10::SymIntArrayRef size, const at::Tensor & indices, const at::Tensor & values, ::std::optional is_coalesced, at::Tensor & out); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_coo_tensor_with_dims_compositeexplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_coo_tensor_with_dims_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..2bf218e635cb3e9dc50bb631d52d8ace31b39f19 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_coo_tensor_with_dims_compositeexplicitautograd_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor & _sparse_coo_tensor_with_dims_out(at::Tensor & out, int64_t sparse_dim, int64_t dense_dim, at::IntArrayRef size); +TORCH_API at::Tensor & _sparse_coo_tensor_with_dims_outf(int64_t sparse_dim, int64_t dense_dim, at::IntArrayRef size, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_coo_tensor_with_dims_meta_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_coo_tensor_with_dims_meta_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..9c42da75fac271a0f66c17e7b9d80d7a88bec7b3 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_coo_tensor_with_dims_meta_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace meta { + +TORCH_API at::Tensor _sparse_coo_tensor_with_dims(int64_t sparse_dim, int64_t dense_dim, at::IntArrayRef size, at::TensorOptions options); +TORCH_API at::Tensor _sparse_coo_tensor_with_dims(int64_t sparse_dim, int64_t dense_dim, at::IntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); + +} // namespace meta +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_coo_tensor_with_dims_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_coo_tensor_with_dims_native.h new file mode 100644 index 0000000000000000000000000000000000000000..3a466ddf58ee275d67f8049ffd0e7d7939bbba79 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_coo_tensor_with_dims_native.h @@ -0,0 +1,22 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor & _sparse_coo_tensor_with_dims_out(int64_t sparse_dim, int64_t dense_dim, at::IntArrayRef size, at::Tensor & out); +TORCH_API at::Tensor new_with_dims_sparse(int64_t sparse_dim, int64_t dense_dim, at::IntArrayRef size, ::std::optional dtype={}, ::std::optional layout={}, ::std::optional device={}, ::std::optional pin_memory={}); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_coo_tensor_with_dims_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_coo_tensor_with_dims_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..e25e03055b6b6540c4ecf508496bdab356544f98 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_coo_tensor_with_dims_ops.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _sparse_coo_tensor_with_dims { + using schema = at::Tensor (int64_t, int64_t, at::IntArrayRef, ::std::optional, ::std::optional, ::std::optional, ::std::optional); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_sparse_coo_tensor_with_dims"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_sparse_coo_tensor_with_dims(int sparse_dim, int dense_dim, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor"; + static at::Tensor call(int64_t sparse_dim, int64_t dense_dim, at::IntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, int64_t sparse_dim, int64_t dense_dim, at::IntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); +}; + +struct TORCH_API _sparse_coo_tensor_with_dims_out { + using schema = at::Tensor & (int64_t, int64_t, at::IntArrayRef, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_sparse_coo_tensor_with_dims"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_sparse_coo_tensor_with_dims.out(int sparse_dim, int dense_dim, int[] size, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(int64_t sparse_dim, int64_t dense_dim, at::IntArrayRef size, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, int64_t sparse_dim, int64_t dense_dim, at::IntArrayRef size, at::Tensor & out); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_csc_tensor_unsafe.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_csc_tensor_unsafe.h new file mode 100644 index 0000000000000000000000000000000000000000..1eecf3c8191f843b85a0eb4e91f4667a4f6a9646 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_csc_tensor_unsafe.h @@ -0,0 +1,35 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_sparse_csc_tensor_unsafe(Tensor ccol_indices, Tensor row_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor _sparse_csc_tensor_unsafe(const at::Tensor & ccol_indices, const at::Tensor & row_indices, const at::Tensor & values, at::IntArrayRef size, at::TensorOptions options={}) { + return at::_ops::_sparse_csc_tensor_unsafe::call(ccol_indices, row_indices, values, size, c10::optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt()); +} +// aten::_sparse_csc_tensor_unsafe(Tensor ccol_indices, Tensor row_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor _sparse_csc_tensor_unsafe(const at::Tensor & ccol_indices, const at::Tensor & row_indices, const at::Tensor & values, at::IntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory) { + return at::_ops::_sparse_csc_tensor_unsafe::call(ccol_indices, row_indices, values, size, dtype, layout, device, pin_memory); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_csc_tensor_unsafe_compositeimplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_csc_tensor_unsafe_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..0efff3ce40b3f7d282727b4b4843e069258ff81e --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_csc_tensor_unsafe_compositeimplicitautograd_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor _sparse_csc_tensor_unsafe(const at::Tensor & ccol_indices, const at::Tensor & row_indices, const at::Tensor & values, at::IntArrayRef size, at::TensorOptions options={}); +TORCH_API at::Tensor _sparse_csc_tensor_unsafe(const at::Tensor & ccol_indices, const at::Tensor & row_indices, const at::Tensor & values, at::IntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); + +} // namespace compositeimplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_csc_tensor_unsafe_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_csc_tensor_unsafe_native.h new file mode 100644 index 0000000000000000000000000000000000000000..f786c3703693cbd46c8ea18ef4f5e69422752420 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_csc_tensor_unsafe_native.h @@ -0,0 +1,21 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor _sparse_csc_tensor_unsafe(const at::Tensor & ccol_indices, const at::Tensor & row_indices, const at::Tensor & values, at::IntArrayRef size, ::std::optional dtype={}, ::std::optional layout={}, ::std::optional device={}, ::std::optional pin_memory={}); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_csc_tensor_unsafe_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_csc_tensor_unsafe_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..cd2efeaf961fc1642c31e90ad3bb186fb964a2bb --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_csc_tensor_unsafe_ops.h @@ -0,0 +1,29 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _sparse_csc_tensor_unsafe { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &, const at::Tensor &, at::IntArrayRef, ::std::optional, ::std::optional, ::std::optional, ::std::optional); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_sparse_csc_tensor_unsafe"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_sparse_csc_tensor_unsafe(Tensor ccol_indices, Tensor row_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor"; + static at::Tensor call(const at::Tensor & ccol_indices, const at::Tensor & row_indices, const at::Tensor & values, at::IntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & ccol_indices, const at::Tensor & row_indices, const at::Tensor & values, at::IntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_csr_prod.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_csr_prod.h new file mode 100644 index 0000000000000000000000000000000000000000..0dd3e66ff58389471062623168629b289057685c --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_csr_prod.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_sparse_csr_prod.dim_dtype(Tensor self, int[1] dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor +inline at::Tensor _sparse_csr_prod(const at::Tensor & self, at::IntArrayRef dim, bool keepdim=false, ::std::optional dtype=::std::nullopt) { + return at::_ops::_sparse_csr_prod_dim_dtype::call(self, dim, keepdim, dtype); +} + +// aten::_sparse_csr_prod.dim_dtype_out(Tensor self, int[1] dim, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _sparse_csr_prod_out(at::Tensor & out, const at::Tensor & self, at::IntArrayRef dim, bool keepdim=false, ::std::optional dtype=::std::nullopt) { + return at::_ops::_sparse_csr_prod_dim_dtype_out::call(self, dim, keepdim, dtype, out); +} +// aten::_sparse_csr_prod.dim_dtype_out(Tensor self, int[1] dim, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _sparse_csr_prod_outf(const at::Tensor & self, at::IntArrayRef dim, bool keepdim, ::std::optional dtype, at::Tensor & out) { + return at::_ops::_sparse_csr_prod_dim_dtype_out::call(self, dim, keepdim, dtype, out); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_csr_prod_compositeexplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_csr_prod_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..6a4e0e68a1687c7b6fe3ea55ce5d5a9e940de82b --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_csr_prod_compositeexplicitautograd_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor & _sparse_csr_prod_out(at::Tensor & out, const at::Tensor & self, at::IntArrayRef dim, bool keepdim=false, ::std::optional dtype=::std::nullopt); +TORCH_API at::Tensor & _sparse_csr_prod_outf(const at::Tensor & self, at::IntArrayRef dim, bool keepdim, ::std::optional dtype, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_csr_prod_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_csr_prod_native.h new file mode 100644 index 0000000000000000000000000000000000000000..4d83f389aeae5ea9fe6e7849a75a22b6e49fe5b2 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_csr_prod_native.h @@ -0,0 +1,23 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor & _sparse_csr_prod_dim_dtype_out(const at::Tensor & self, at::IntArrayRef dim, bool keepdim, ::std::optional dtype, at::Tensor & out); +TORCH_API at::Tensor _sparse_csr_prod_cpu(const at::Tensor & self, at::IntArrayRef dim, bool keepdim=false, ::std::optional dtype=::std::nullopt); +TORCH_API at::Tensor _sparse_csr_prod_cuda(const at::Tensor & self, at::IntArrayRef dim, bool keepdim=false, ::std::optional dtype=::std::nullopt); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_csr_prod_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_csr_prod_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..9adc01b4d67c33b514b437ec67fd4e7f78340b6f --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_csr_prod_ops.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _sparse_csr_prod_dim_dtype { + using schema = at::Tensor (const at::Tensor &, at::IntArrayRef, bool, ::std::optional); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_sparse_csr_prod"; + static constexpr const char* overload_name = "dim_dtype"; + static constexpr const char* schema_str = "_sparse_csr_prod.dim_dtype(Tensor self, int[1] dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor"; + static at::Tensor call(const at::Tensor & self, at::IntArrayRef dim, bool keepdim, ::std::optional dtype); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::IntArrayRef dim, bool keepdim, ::std::optional dtype); +}; + +struct TORCH_API _sparse_csr_prod_dim_dtype_out { + using schema = at::Tensor & (const at::Tensor &, at::IntArrayRef, bool, ::std::optional, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_sparse_csr_prod"; + static constexpr const char* overload_name = "dim_dtype_out"; + static constexpr const char* schema_str = "_sparse_csr_prod.dim_dtype_out(Tensor self, int[1] dim, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, at::IntArrayRef dim, bool keepdim, ::std::optional dtype, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::IntArrayRef dim, bool keepdim, ::std::optional dtype, at::Tensor & out); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_csr_sum.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_csr_sum.h new file mode 100644 index 0000000000000000000000000000000000000000..948ed57bae41a63488db522601d943cee0f29ede --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_csr_sum.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_sparse_csr_sum.dim_dtype(Tensor self, int[1] dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor +inline at::Tensor _sparse_csr_sum(const at::Tensor & self, at::IntArrayRef dim, bool keepdim=false, ::std::optional dtype=::std::nullopt) { + return at::_ops::_sparse_csr_sum_dim_dtype::call(self, dim, keepdim, dtype); +} + +// aten::_sparse_csr_sum.dim_dtype_out(Tensor self, int[1] dim, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _sparse_csr_sum_out(at::Tensor & out, const at::Tensor & self, at::IntArrayRef dim, bool keepdim=false, ::std::optional dtype=::std::nullopt) { + return at::_ops::_sparse_csr_sum_dim_dtype_out::call(self, dim, keepdim, dtype, out); +} +// aten::_sparse_csr_sum.dim_dtype_out(Tensor self, int[1] dim, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _sparse_csr_sum_outf(const at::Tensor & self, at::IntArrayRef dim, bool keepdim, ::std::optional dtype, at::Tensor & out) { + return at::_ops::_sparse_csr_sum_dim_dtype_out::call(self, dim, keepdim, dtype, out); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_csr_sum_compositeexplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_csr_sum_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..13d0e04f98cfe238eaedfa40867e48cc83a5575e --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_csr_sum_compositeexplicitautograd_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor & _sparse_csr_sum_out(at::Tensor & out, const at::Tensor & self, at::IntArrayRef dim, bool keepdim=false, ::std::optional dtype=::std::nullopt); +TORCH_API at::Tensor & _sparse_csr_sum_outf(const at::Tensor & self, at::IntArrayRef dim, bool keepdim, ::std::optional dtype, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_csr_sum_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_csr_sum_native.h new file mode 100644 index 0000000000000000000000000000000000000000..4f33d0b37127c94a968084d34046134c4e3c6b4c --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_csr_sum_native.h @@ -0,0 +1,23 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor & _sparse_csr_sum_dim_dtype_out(const at::Tensor & self, at::IntArrayRef dim, bool keepdim, ::std::optional dtype, at::Tensor & out); +TORCH_API at::Tensor _sparse_csr_sum_cpu(const at::Tensor & self, at::IntArrayRef dim, bool keepdim=false, ::std::optional dtype=::std::nullopt); +TORCH_API at::Tensor _sparse_csr_sum_cuda(const at::Tensor & self, at::IntArrayRef dim, bool keepdim=false, ::std::optional dtype=::std::nullopt); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_csr_sum_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_csr_sum_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..d55860dd25401f248e0e96f7877919682e68cdb4 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_csr_sum_ops.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _sparse_csr_sum_dim_dtype { + using schema = at::Tensor (const at::Tensor &, at::IntArrayRef, bool, ::std::optional); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_sparse_csr_sum"; + static constexpr const char* overload_name = "dim_dtype"; + static constexpr const char* schema_str = "_sparse_csr_sum.dim_dtype(Tensor self, int[1] dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor"; + static at::Tensor call(const at::Tensor & self, at::IntArrayRef dim, bool keepdim, ::std::optional dtype); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::IntArrayRef dim, bool keepdim, ::std::optional dtype); +}; + +struct TORCH_API _sparse_csr_sum_dim_dtype_out { + using schema = at::Tensor & (const at::Tensor &, at::IntArrayRef, bool, ::std::optional, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_sparse_csr_sum"; + static constexpr const char* overload_name = "dim_dtype_out"; + static constexpr const char* schema_str = "_sparse_csr_sum.dim_dtype_out(Tensor self, int[1] dim, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, at::IntArrayRef dim, bool keepdim, ::std::optional dtype, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::IntArrayRef dim, bool keepdim, ::std::optional dtype, at::Tensor & out); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_csr_tensor_unsafe.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_csr_tensor_unsafe.h new file mode 100644 index 0000000000000000000000000000000000000000..5a8e5707be1fe51ceffb58efb9f889ce4d785851 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_csr_tensor_unsafe.h @@ -0,0 +1,35 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_sparse_csr_tensor_unsafe(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor _sparse_csr_tensor_unsafe(const at::Tensor & crow_indices, const at::Tensor & col_indices, const at::Tensor & values, at::IntArrayRef size, at::TensorOptions options={}) { + return at::_ops::_sparse_csr_tensor_unsafe::call(crow_indices, col_indices, values, size, c10::optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt()); +} +// aten::_sparse_csr_tensor_unsafe(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor _sparse_csr_tensor_unsafe(const at::Tensor & crow_indices, const at::Tensor & col_indices, const at::Tensor & values, at::IntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory) { + return at::_ops::_sparse_csr_tensor_unsafe::call(crow_indices, col_indices, values, size, dtype, layout, device, pin_memory); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_csr_tensor_unsafe_compositeimplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_csr_tensor_unsafe_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..f2c4e64584591a6d9e654dd341fc578609613b6d --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_csr_tensor_unsafe_compositeimplicitautograd_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor _sparse_csr_tensor_unsafe(const at::Tensor & crow_indices, const at::Tensor & col_indices, const at::Tensor & values, at::IntArrayRef size, at::TensorOptions options={}); +TORCH_API at::Tensor _sparse_csr_tensor_unsafe(const at::Tensor & crow_indices, const at::Tensor & col_indices, const at::Tensor & values, at::IntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); + +} // namespace compositeimplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_csr_tensor_unsafe_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_csr_tensor_unsafe_native.h new file mode 100644 index 0000000000000000000000000000000000000000..b2426c28cef24b644cd5139c9b948e7d12bd4402 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_csr_tensor_unsafe_native.h @@ -0,0 +1,21 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor _sparse_csr_tensor_unsafe(const at::Tensor & crow_indices, const at::Tensor & col_indices, const at::Tensor & values, at::IntArrayRef size, ::std::optional dtype={}, ::std::optional layout={}, ::std::optional device={}, ::std::optional pin_memory={}); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_csr_tensor_unsafe_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_csr_tensor_unsafe_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..81676f8c3568b5bdac2d94b8f7e32fafde9fe436 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_csr_tensor_unsafe_ops.h @@ -0,0 +1,29 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _sparse_csr_tensor_unsafe { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &, const at::Tensor &, at::IntArrayRef, ::std::optional, ::std::optional, ::std::optional, ::std::optional); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_sparse_csr_tensor_unsafe"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_sparse_csr_tensor_unsafe(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor"; + static at::Tensor call(const at::Tensor & crow_indices, const at::Tensor & col_indices, const at::Tensor & values, at::IntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & crow_indices, const at::Tensor & col_indices, const at::Tensor & values, at::IntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_log_softmax.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_log_softmax.h new file mode 100644 index 0000000000000000000000000000000000000000..0dd8944ee35d4ccde70a32e052cda171465bdcf0 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_log_softmax.h @@ -0,0 +1,50 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_sparse_log_softmax.int(Tensor self, int dim, ScalarType? dtype=None) -> Tensor +inline at::Tensor _sparse_log_softmax(const at::Tensor & self, int64_t dim, ::std::optional dtype=::std::nullopt) { + return at::_ops::_sparse_log_softmax_int::call(self, dim, dtype); +} + +// aten::_sparse_log_softmax.Dimname(Tensor self, Dimname dim, *, ScalarType? dtype=None) -> Tensor +inline at::Tensor _sparse_log_softmax(const at::Tensor & self, at::Dimname dim, ::std::optional dtype=::std::nullopt) { + return at::_ops::_sparse_log_softmax_Dimname::call(self, dim, dtype); +} + +// aten::_sparse_log_softmax(Tensor self, int dim, bool half_to_float) -> Tensor +inline at::Tensor _sparse_log_softmax(const at::Tensor & self, int64_t dim, bool half_to_float) { + return at::_ops::_sparse_log_softmax::call(self, dim, half_to_float); +} + +// aten::_sparse_log_softmax.out(Tensor self, int dim, bool half_to_float, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _sparse_log_softmax_out(at::Tensor & out, const at::Tensor & self, int64_t dim, bool half_to_float) { + return at::_ops::_sparse_log_softmax_out::call(self, dim, half_to_float, out); +} +// aten::_sparse_log_softmax.out(Tensor self, int dim, bool half_to_float, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _sparse_log_softmax_outf(const at::Tensor & self, int64_t dim, bool half_to_float, at::Tensor & out) { + return at::_ops::_sparse_log_softmax_out::call(self, dim, half_to_float, out); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_log_softmax_backward_data.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_log_softmax_backward_data.h new file mode 100644 index 0000000000000000000000000000000000000000..382bd78c60c86142145e041fe35eaf217292d59f --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_log_softmax_backward_data.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_sparse_log_softmax_backward_data(Tensor grad_output, Tensor output, int dim, Tensor self) -> Tensor +inline at::Tensor _sparse_log_softmax_backward_data(const at::Tensor & grad_output, const at::Tensor & output, int64_t dim, const at::Tensor & self) { + return at::_ops::_sparse_log_softmax_backward_data::call(grad_output, output, dim, self); +} + +// aten::_sparse_log_softmax_backward_data.out(Tensor grad_output, Tensor output, int dim, Tensor self, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _sparse_log_softmax_backward_data_out(at::Tensor & out, const at::Tensor & grad_output, const at::Tensor & output, int64_t dim, const at::Tensor & self) { + return at::_ops::_sparse_log_softmax_backward_data_out::call(grad_output, output, dim, self, out); +} +// aten::_sparse_log_softmax_backward_data.out(Tensor grad_output, Tensor output, int dim, Tensor self, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _sparse_log_softmax_backward_data_outf(const at::Tensor & grad_output, const at::Tensor & output, int64_t dim, const at::Tensor & self, at::Tensor & out) { + return at::_ops::_sparse_log_softmax_backward_data_out::call(grad_output, output, dim, self, out); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_log_softmax_backward_data_compositeexplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_log_softmax_backward_data_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..8358bfd66619074644815589af2090561b5f3795 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_log_softmax_backward_data_compositeexplicitautograd_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor & _sparse_log_softmax_backward_data_out(at::Tensor & out, const at::Tensor & grad_output, const at::Tensor & output, int64_t dim, const at::Tensor & self); +TORCH_API at::Tensor & _sparse_log_softmax_backward_data_outf(const at::Tensor & grad_output, const at::Tensor & output, int64_t dim, const at::Tensor & self, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_log_softmax_backward_data_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_log_softmax_backward_data_native.h new file mode 100644 index 0000000000000000000000000000000000000000..f236830ae42e7635fa96e43837da689962990276 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_log_softmax_backward_data_native.h @@ -0,0 +1,23 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor & _sparse_log_softmax_backward_data_out(const at::Tensor & grad_output, const at::Tensor & output, int64_t dim, const at::Tensor & self, at::Tensor & out); +TORCH_API at::Tensor log_softmax_backward_sparse_cpu(const at::Tensor & grad_output, const at::Tensor & output, int64_t dim, const at::Tensor & self); +TORCH_API at::Tensor log_softmax_backward_sparse_cuda(const at::Tensor & grad_output, const at::Tensor & output, int64_t dim, const at::Tensor & self); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_log_softmax_backward_data_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_log_softmax_backward_data_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..17c88812d168ee0c5d795ccc8b4a8032e9d191e1 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_log_softmax_backward_data_ops.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _sparse_log_softmax_backward_data { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &, int64_t, const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_sparse_log_softmax_backward_data"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_sparse_log_softmax_backward_data(Tensor grad_output, Tensor output, int dim, Tensor self) -> Tensor"; + static at::Tensor call(const at::Tensor & grad_output, const at::Tensor & output, int64_t dim, const at::Tensor & self); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & grad_output, const at::Tensor & output, int64_t dim, const at::Tensor & self); +}; + +struct TORCH_API _sparse_log_softmax_backward_data_out { + using schema = at::Tensor & (const at::Tensor &, const at::Tensor &, int64_t, const at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_sparse_log_softmax_backward_data"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_sparse_log_softmax_backward_data.out(Tensor grad_output, Tensor output, int dim, Tensor self, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & grad_output, const at::Tensor & output, int64_t dim, const at::Tensor & self, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & grad_output, const at::Tensor & output, int64_t dim, const at::Tensor & self, at::Tensor & out); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_log_softmax_compositeexplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_log_softmax_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..28e35430cf2b79490e8abdd824dd9fc6d85a7b05 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_log_softmax_compositeexplicitautograd_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor & _sparse_log_softmax_out(at::Tensor & out, const at::Tensor & self, int64_t dim, bool half_to_float); +TORCH_API at::Tensor & _sparse_log_softmax_outf(const at::Tensor & self, int64_t dim, bool half_to_float, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_log_softmax_compositeimplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_log_softmax_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..19ac50e791ce7513def4e36cea828928c62c7593 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_log_softmax_compositeimplicitautograd_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor _sparse_log_softmax(const at::Tensor & self, int64_t dim, ::std::optional dtype=::std::nullopt); +TORCH_API at::Tensor _sparse_log_softmax(const at::Tensor & self, at::Dimname dim, ::std::optional dtype=::std::nullopt); + +} // namespace compositeimplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_log_softmax_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_log_softmax_native.h new file mode 100644 index 0000000000000000000000000000000000000000..15212e73f189df9d53bba19f41017681d8db97ef --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_log_softmax_native.h @@ -0,0 +1,25 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor _sparse_log_softmax(const at::Tensor & self, int64_t dim, ::std::optional dtype=::std::nullopt); +TORCH_API at::Tensor _sparse_log_softmax(const at::Tensor & self, at::Dimname dim, ::std::optional dtype=::std::nullopt); +TORCH_API at::Tensor & _sparse_log_softmax_out(const at::Tensor & self, int64_t dim, bool half_to_float, at::Tensor & out); +TORCH_API at::Tensor log_softmax_sparse_cpu(const at::Tensor & self, int64_t dim, bool half_to_float); +TORCH_API at::Tensor log_softmax_sparse_cuda(const at::Tensor & self, int64_t dim, bool half_to_float); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_log_softmax_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_log_softmax_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..8023761972a059ce27313d75e4d4281a02ac5af3 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_log_softmax_ops.h @@ -0,0 +1,62 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _sparse_log_softmax_int { + using schema = at::Tensor (const at::Tensor &, int64_t, ::std::optional); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_sparse_log_softmax"; + static constexpr const char* overload_name = "int"; + static constexpr const char* schema_str = "_sparse_log_softmax.int(Tensor self, int dim, ScalarType? dtype=None) -> Tensor"; + static at::Tensor call(const at::Tensor & self, int64_t dim, ::std::optional dtype); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, int64_t dim, ::std::optional dtype); +}; + +struct TORCH_API _sparse_log_softmax_Dimname { + using schema = at::Tensor (const at::Tensor &, at::Dimname, ::std::optional); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_sparse_log_softmax"; + static constexpr const char* overload_name = "Dimname"; + static constexpr const char* schema_str = "_sparse_log_softmax.Dimname(Tensor self, Dimname dim, *, ScalarType? dtype=None) -> Tensor"; + static at::Tensor call(const at::Tensor & self, at::Dimname dim, ::std::optional dtype); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::Dimname dim, ::std::optional dtype); +}; + +struct TORCH_API _sparse_log_softmax { + using schema = at::Tensor (const at::Tensor &, int64_t, bool); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_sparse_log_softmax"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_sparse_log_softmax(Tensor self, int dim, bool half_to_float) -> Tensor"; + static at::Tensor call(const at::Tensor & self, int64_t dim, bool half_to_float); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, int64_t dim, bool half_to_float); +}; + +struct TORCH_API _sparse_log_softmax_out { + using schema = at::Tensor & (const at::Tensor &, int64_t, bool, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_sparse_log_softmax"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_sparse_log_softmax.out(Tensor self, int dim, bool half_to_float, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, int64_t dim, bool half_to_float, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, int64_t dim, bool half_to_float, at::Tensor & out); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_mask_projection.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_mask_projection.h new file mode 100644 index 0000000000000000000000000000000000000000..59a277197dcb1b09508559a9e0bf07e9e9a85618 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_mask_projection.h @@ -0,0 +1,35 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_sparse_mask_projection.out(Tensor self, Tensor mask, bool accumulate_matches=False, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _sparse_mask_projection_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & mask, bool accumulate_matches=false) { + return at::_ops::_sparse_mask_projection_out::call(self, mask, accumulate_matches, out); +} +// aten::_sparse_mask_projection.out(Tensor self, Tensor mask, bool accumulate_matches=False, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _sparse_mask_projection_outf(const at::Tensor & self, const at::Tensor & mask, bool accumulate_matches, at::Tensor & out) { + return at::_ops::_sparse_mask_projection_out::call(self, mask, accumulate_matches, out); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_mask_projection_compositeexplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_mask_projection_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..12af3c49c6ef3394db02fd1ae13bc140e4e73ca5 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_mask_projection_compositeexplicitautograd_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor & _sparse_mask_projection_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & mask, bool accumulate_matches=false); +TORCH_API at::Tensor & _sparse_mask_projection_outf(const at::Tensor & self, const at::Tensor & mask, bool accumulate_matches, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_mask_projection_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_mask_projection_native.h new file mode 100644 index 0000000000000000000000000000000000000000..581aadfff3a60815bc4985c26224e26ea170d1b2 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_mask_projection_native.h @@ -0,0 +1,22 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor & _sparse_mask_projection_out(const at::Tensor & self, const at::Tensor & mask, bool accumulate_matches, at::Tensor & out); +TORCH_API at::Tensor sparse_mask_projection(const at::Tensor & self, const at::Tensor & mask, bool accumulate_matches=false); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_mask_projection_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_mask_projection_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..25e3096647520a9c54ecf95125dba10db4fa74f3 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_mask_projection_ops.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _sparse_mask_projection { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &, bool); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_sparse_mask_projection"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_sparse_mask_projection(Tensor self, Tensor mask, bool accumulate_matches=False) -> Tensor"; + static at::Tensor call(const at::Tensor & self, const at::Tensor & mask, bool accumulate_matches); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & mask, bool accumulate_matches); +}; + +struct TORCH_API _sparse_mask_projection_out { + using schema = at::Tensor & (const at::Tensor &, const at::Tensor &, bool, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_sparse_mask_projection"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_sparse_mask_projection.out(Tensor self, Tensor mask, bool accumulate_matches=False, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, const at::Tensor & mask, bool accumulate_matches, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & mask, bool accumulate_matches, at::Tensor & out); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_mm.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_mm.h new file mode 100644 index 0000000000000000000000000000000000000000..2435d5c7acd6d4d73250a4822a54fd9e0a0278ec --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_mm.h @@ -0,0 +1,36 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_sparse_mm(Tensor sparse, Tensor dense) -> Tensor +inline at::Tensor _sparse_mm(const at::Tensor & sparse, const at::Tensor & dense) { + return at::_ops::_sparse_mm::call(sparse, dense); +} + +// aten::_sparse_mm.reduce(Tensor sparse, Tensor dense, str reduce) -> Tensor +inline at::Tensor _sparse_mm(const at::Tensor & sparse, const at::Tensor & dense, c10::string_view reduce) { + return at::_ops::_sparse_mm_reduce::call(sparse, dense, reduce); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_mm_compositeimplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_mm_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..545999861de0bb8eb6c06dba3831eea7e894184d --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_mm_compositeimplicitautograd_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor _sparse_mm(const at::Tensor & sparse, const at::Tensor & dense); +TORCH_API at::Tensor _sparse_mm(const at::Tensor & sparse, const at::Tensor & dense, c10::string_view reduce); + +} // namespace compositeimplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_mm_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_mm_native.h new file mode 100644 index 0000000000000000000000000000000000000000..9eddbcd9d2e52abe8e297e1e9c5013145715d5ac --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_mm_native.h @@ -0,0 +1,22 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor _sparse_mm(const at::Tensor & sparse, const at::Tensor & dense); +TORCH_API at::Tensor _sparse_mm(const at::Tensor & sparse, const at::Tensor & dense, c10::string_view reduce); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_mm_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_mm_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..3db521e269a735fd1248a4bfa699abcc55a0a156 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_mm_ops.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _sparse_mm { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_sparse_mm"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_sparse_mm(Tensor sparse, Tensor dense) -> Tensor"; + static at::Tensor call(const at::Tensor & sparse, const at::Tensor & dense); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & sparse, const at::Tensor & dense); +}; + +struct TORCH_API _sparse_mm_reduce { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &, c10::string_view); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_sparse_mm"; + static constexpr const char* overload_name = "reduce"; + static constexpr const char* schema_str = "_sparse_mm.reduce(Tensor sparse, Tensor dense, str reduce) -> Tensor"; + static at::Tensor call(const at::Tensor & sparse, const at::Tensor & dense, c10::string_view reduce); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & sparse, const at::Tensor & dense, c10::string_view reduce); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_mm_reduce_impl.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_mm_reduce_impl.h new file mode 100644 index 0000000000000000000000000000000000000000..01ae6e8d9baaaa95893978dcf773fe5a5a116c9f --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_mm_reduce_impl.h @@ -0,0 +1,31 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_sparse_mm_reduce_impl(Tensor self, Tensor other, str reduce) -> (Tensor, Tensor) +inline ::std::tuple _sparse_mm_reduce_impl(const at::Tensor & self, const at::Tensor & other, c10::string_view reduce) { + return at::_ops::_sparse_mm_reduce_impl::call(self, other, reduce); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_mm_reduce_impl_backward.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_mm_reduce_impl_backward.h new file mode 100644 index 0000000000000000000000000000000000000000..46501245a0150bd117f4d5e6e7221e6dfff364d1 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_mm_reduce_impl_backward.h @@ -0,0 +1,31 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_sparse_mm_reduce_impl_backward(Tensor self, Tensor grad_out, Tensor weight, str reduce, Tensor arg_out, bool[2] output_mask) -> (Tensor, Tensor) +inline ::std::tuple _sparse_mm_reduce_impl_backward(const at::Tensor & self, const at::Tensor & grad_out, const at::Tensor & weight, c10::string_view reduce, const at::Tensor & arg_out, ::std::array output_mask) { + return at::_ops::_sparse_mm_reduce_impl_backward::call(self, grad_out, weight, reduce, arg_out, output_mask); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_mm_reduce_impl_backward_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_mm_reduce_impl_backward_native.h new file mode 100644 index 0000000000000000000000000000000000000000..65e3df2e0ba83048d4f1ce90ed084ec3008d1e65 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_mm_reduce_impl_backward_native.h @@ -0,0 +1,21 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API ::std::tuple _sparse_mm_reduce_impl_backward_sparse_csr_cpu(const at::Tensor & self, const at::Tensor & grad_out, const at::Tensor & weight, c10::string_view reduce, const at::Tensor & arg_out, ::std::array output_mask); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_mm_reduce_impl_backward_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_mm_reduce_impl_backward_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..a5e19c6a0ef249140876ac672afaa2ee6aa10fc7 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_mm_reduce_impl_backward_ops.h @@ -0,0 +1,29 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _sparse_mm_reduce_impl_backward { + using schema = ::std::tuple (const at::Tensor &, const at::Tensor &, const at::Tensor &, c10::string_view, const at::Tensor &, ::std::array); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_sparse_mm_reduce_impl_backward"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_sparse_mm_reduce_impl_backward(Tensor self, Tensor grad_out, Tensor weight, str reduce, Tensor arg_out, bool[2] output_mask) -> (Tensor, Tensor)"; + static ::std::tuple call(const at::Tensor & self, const at::Tensor & grad_out, const at::Tensor & weight, c10::string_view reduce, const at::Tensor & arg_out, ::std::array output_mask); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & grad_out, const at::Tensor & weight, c10::string_view reduce, const at::Tensor & arg_out, ::std::array output_mask); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_mm_reduce_impl_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_mm_reduce_impl_native.h new file mode 100644 index 0000000000000000000000000000000000000000..270960770241c0ebfeecb52a7366b99797b9de66 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_mm_reduce_impl_native.h @@ -0,0 +1,21 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API ::std::tuple _sparse_mm_reduce_impl_sparse_csr_cpu(const at::Tensor & self, const at::Tensor & other, c10::string_view reduce); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_mm_reduce_impl_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_mm_reduce_impl_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..556634f2119d23d03a85313193fa74becab91d6f --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_mm_reduce_impl_ops.h @@ -0,0 +1,29 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _sparse_mm_reduce_impl { + using schema = ::std::tuple (const at::Tensor &, const at::Tensor &, c10::string_view); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_sparse_mm_reduce_impl"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_sparse_mm_reduce_impl(Tensor self, Tensor other, str reduce) -> (Tensor, Tensor)"; + static ::std::tuple call(const at::Tensor & self, const at::Tensor & other, c10::string_view reduce); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & other, c10::string_view reduce); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_semi_structured_addmm.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_semi_structured_addmm.h new file mode 100644 index 0000000000000000000000000000000000000000..d7f7e9a263d53a68e3a3f4ff33d6f8dc32176cbc --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_semi_structured_addmm.h @@ -0,0 +1,31 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_sparse_semi_structured_addmm(Tensor input, Tensor mat1, Tensor mat1_meta, Tensor mat2, *, Scalar alpha=1, Scalar beta=1, ScalarType? out_dtype=None) -> Tensor +inline at::Tensor _sparse_semi_structured_addmm(const at::Tensor & input, const at::Tensor & mat1, const at::Tensor & mat1_meta, const at::Tensor & mat2, const at::Scalar & alpha=1, const at::Scalar & beta=1, ::std::optional out_dtype=::std::nullopt) { + return at::_ops::_sparse_semi_structured_addmm::call(input, mat1, mat1_meta, mat2, alpha, beta, out_dtype); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_semi_structured_addmm_cuda_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_semi_structured_addmm_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..e06719f6a51f3299163501513809e5caf890bad2 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_semi_structured_addmm_cuda_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor _sparse_semi_structured_addmm(const at::Tensor & input, const at::Tensor & mat1, const at::Tensor & mat1_meta, const at::Tensor & mat2, const at::Scalar & alpha=1, const at::Scalar & beta=1, ::std::optional out_dtype=::std::nullopt); + +} // namespace cuda +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_semi_structured_addmm_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_semi_structured_addmm_native.h new file mode 100644 index 0000000000000000000000000000000000000000..1d463f7b7f1df8452b454df7afb4249198529268 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_semi_structured_addmm_native.h @@ -0,0 +1,21 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor _sparse_semi_structured_addmm(const at::Tensor & input, const at::Tensor & mat1, const at::Tensor & mat1_meta, const at::Tensor & mat2, const at::Scalar & alpha=1, const at::Scalar & beta=1, ::std::optional out_dtype=::std::nullopt); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_semi_structured_addmm_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_semi_structured_addmm_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..310c5e93b708912a8344b3dc8dc1163e6ba246ed --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_semi_structured_addmm_ops.h @@ -0,0 +1,29 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _sparse_semi_structured_addmm { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &, const at::Tensor &, const at::Tensor &, const at::Scalar &, const at::Scalar &, ::std::optional); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_sparse_semi_structured_addmm"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_sparse_semi_structured_addmm(Tensor input, Tensor mat1, Tensor mat1_meta, Tensor mat2, *, Scalar alpha=1, Scalar beta=1, ScalarType? out_dtype=None) -> Tensor"; + static at::Tensor call(const at::Tensor & input, const at::Tensor & mat1, const at::Tensor & mat1_meta, const at::Tensor & mat2, const at::Scalar & alpha, const at::Scalar & beta, ::std::optional out_dtype); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & input, const at::Tensor & mat1, const at::Tensor & mat1_meta, const at::Tensor & mat2, const at::Scalar & alpha, const at::Scalar & beta, ::std::optional out_dtype); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_semi_structured_apply.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_semi_structured_apply.h new file mode 100644 index 0000000000000000000000000000000000000000..d0544681ab1d21482ade368b884342a62d2c1202 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_semi_structured_apply.h @@ -0,0 +1,31 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_sparse_semi_structured_apply(Tensor input, Tensor thread_masks) -> (Tensor, Tensor) +inline ::std::tuple _sparse_semi_structured_apply(const at::Tensor & input, const at::Tensor & thread_masks) { + return at::_ops::_sparse_semi_structured_apply::call(input, thread_masks); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_semi_structured_apply_cuda_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_semi_structured_apply_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..c91ce1c738adb31bca625200e3e2637d9dc7e38c --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_semi_structured_apply_cuda_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API ::std::tuple _sparse_semi_structured_apply(const at::Tensor & input, const at::Tensor & thread_masks); + +} // namespace cuda +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_semi_structured_apply_dense.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_semi_structured_apply_dense.h new file mode 100644 index 0000000000000000000000000000000000000000..63162c150b71ee17a84b51bb86a39f0751716fec --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_semi_structured_apply_dense.h @@ -0,0 +1,31 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_sparse_semi_structured_apply_dense(Tensor input, Tensor thread_masks) -> Tensor +inline at::Tensor _sparse_semi_structured_apply_dense(const at::Tensor & input, const at::Tensor & thread_masks) { + return at::_ops::_sparse_semi_structured_apply_dense::call(input, thread_masks); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_semi_structured_apply_dense_cuda_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_semi_structured_apply_dense_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..ce50e3e4e4059cee088d90ea1f7cd98cf4e790ef --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_semi_structured_apply_dense_cuda_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor _sparse_semi_structured_apply_dense(const at::Tensor & input, const at::Tensor & thread_masks); + +} // namespace cuda +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_semi_structured_apply_dense_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_semi_structured_apply_dense_native.h new file mode 100644 index 0000000000000000000000000000000000000000..b57d1c8322ba5ef01a0fdab881f47eedd9bda7df --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_semi_structured_apply_dense_native.h @@ -0,0 +1,21 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor _sparse_semi_structured_apply_dense(const at::Tensor & input, const at::Tensor & thread_masks); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_semi_structured_apply_dense_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_semi_structured_apply_dense_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..ad7c8accb63a221aaa6909016f721e8655ffa8ad --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_semi_structured_apply_dense_ops.h @@ -0,0 +1,29 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _sparse_semi_structured_apply_dense { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_sparse_semi_structured_apply_dense"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_sparse_semi_structured_apply_dense(Tensor input, Tensor thread_masks) -> Tensor"; + static at::Tensor call(const at::Tensor & input, const at::Tensor & thread_masks); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & input, const at::Tensor & thread_masks); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_semi_structured_apply_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_semi_structured_apply_native.h new file mode 100644 index 0000000000000000000000000000000000000000..940e22c25d8b173e9dc2941c065e85f6a7f8a583 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_semi_structured_apply_native.h @@ -0,0 +1,21 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API ::std::tuple _sparse_semi_structured_apply(const at::Tensor & input, const at::Tensor & thread_masks); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_semi_structured_apply_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_semi_structured_apply_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..9cdb82e4b20f5c8f555b25c265434ebea007930d --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_semi_structured_apply_ops.h @@ -0,0 +1,29 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _sparse_semi_structured_apply { + using schema = ::std::tuple (const at::Tensor &, const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_sparse_semi_structured_apply"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_sparse_semi_structured_apply(Tensor input, Tensor thread_masks) -> (Tensor, Tensor)"; + static ::std::tuple call(const at::Tensor & input, const at::Tensor & thread_masks); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & input, const at::Tensor & thread_masks); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_semi_structured_linear.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_semi_structured_linear.h new file mode 100644 index 0000000000000000000000000000000000000000..8718b06f703c8d8d4e406f2ca4adb512d5e9f369 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_semi_structured_linear.h @@ -0,0 +1,31 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_sparse_semi_structured_linear(Tensor input, Tensor weight, Tensor meta, *, Tensor? bias=None, str? activation=None, ScalarType? out_dtype=None) -> Tensor +inline at::Tensor _sparse_semi_structured_linear(const at::Tensor & input, const at::Tensor & weight, const at::Tensor & meta, const ::std::optional & bias={}, ::std::optional activation=::std::nullopt, ::std::optional out_dtype=::std::nullopt) { + return at::_ops::_sparse_semi_structured_linear::call(input, weight, meta, bias, activation, out_dtype); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_semi_structured_linear_cuda_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_semi_structured_linear_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..a2f519a5e9149c9666397d3bdaa68e80b17ae916 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_semi_structured_linear_cuda_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor _sparse_semi_structured_linear(const at::Tensor & input, const at::Tensor & weight, const at::Tensor & meta, const ::std::optional & bias={}, ::std::optional activation=::std::nullopt, ::std::optional out_dtype=::std::nullopt); + +} // namespace cuda +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_semi_structured_linear_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_semi_structured_linear_native.h new file mode 100644 index 0000000000000000000000000000000000000000..671c1e33c71d98bd968be4e8eadd418acea1f628 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_semi_structured_linear_native.h @@ -0,0 +1,21 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor _sparse_semi_structured_linear(const at::Tensor & input, const at::Tensor & weight, const at::Tensor & meta, const ::std::optional & bias={}, ::std::optional activation=::std::nullopt, ::std::optional out_dtype=::std::nullopt); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_semi_structured_linear_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_semi_structured_linear_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..6d793c782ce73003b14ac2fac72b8e5ee3c49e75 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_semi_structured_linear_ops.h @@ -0,0 +1,29 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _sparse_semi_structured_linear { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &, const at::Tensor &, const ::std::optional &, ::std::optional, ::std::optional); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_sparse_semi_structured_linear"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_sparse_semi_structured_linear(Tensor input, Tensor weight, Tensor meta, *, Tensor? bias=None, str? activation=None, ScalarType? out_dtype=None) -> Tensor"; + static at::Tensor call(const at::Tensor & input, const at::Tensor & weight, const at::Tensor & meta, const ::std::optional & bias, ::std::optional activation, ::std::optional out_dtype); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & input, const at::Tensor & weight, const at::Tensor & meta, const ::std::optional & bias, ::std::optional activation, ::std::optional out_dtype); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_semi_structured_mm.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_semi_structured_mm.h new file mode 100644 index 0000000000000000000000000000000000000000..491a2ee67539fd109fea9fd9632bd445bc4d0aaa --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_semi_structured_mm.h @@ -0,0 +1,31 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_sparse_semi_structured_mm(Tensor mat1, Tensor mat1_meta, Tensor mat2, *, ScalarType? out_dtype=None) -> Tensor +inline at::Tensor _sparse_semi_structured_mm(const at::Tensor & mat1, const at::Tensor & mat1_meta, const at::Tensor & mat2, ::std::optional out_dtype=::std::nullopt) { + return at::_ops::_sparse_semi_structured_mm::call(mat1, mat1_meta, mat2, out_dtype); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_semi_structured_mm_cuda_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_semi_structured_mm_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..3a1266c5c7d2597957af698694bb3bf64f5dc17b --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_semi_structured_mm_cuda_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor _sparse_semi_structured_mm(const at::Tensor & mat1, const at::Tensor & mat1_meta, const at::Tensor & mat2, ::std::optional out_dtype=::std::nullopt); + +} // namespace cuda +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_semi_structured_mm_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_semi_structured_mm_native.h new file mode 100644 index 0000000000000000000000000000000000000000..2482e5405e8b2f9b5a3ca27da1d7942dfdae4d0d --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_semi_structured_mm_native.h @@ -0,0 +1,21 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor _sparse_semi_structured_mm(const at::Tensor & mat1, const at::Tensor & mat1_meta, const at::Tensor & mat2, ::std::optional out_dtype=::std::nullopt); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_semi_structured_mm_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_semi_structured_mm_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..40b00d4886a0de78c16a242de7bca5756be52493 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_semi_structured_mm_ops.h @@ -0,0 +1,29 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _sparse_semi_structured_mm { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &, const at::Tensor &, ::std::optional); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_sparse_semi_structured_mm"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_sparse_semi_structured_mm(Tensor mat1, Tensor mat1_meta, Tensor mat2, *, ScalarType? out_dtype=None) -> Tensor"; + static at::Tensor call(const at::Tensor & mat1, const at::Tensor & mat1_meta, const at::Tensor & mat2, ::std::optional out_dtype); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & mat1, const at::Tensor & mat1_meta, const at::Tensor & mat2, ::std::optional out_dtype); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_semi_structured_tile.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_semi_structured_tile.h new file mode 100644 index 0000000000000000000000000000000000000000..fb15f0b5de071045bf4ed5dd55d5fef854ec2d2a --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_semi_structured_tile.h @@ -0,0 +1,31 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_sparse_semi_structured_tile(Tensor input, str algorithm="", bool use_cutlass=True) -> (Tensor, Tensor, Tensor, Tensor, Tensor) +inline ::std::tuple _sparse_semi_structured_tile(const at::Tensor & input, c10::string_view algorithm="", bool use_cutlass=true) { + return at::_ops::_sparse_semi_structured_tile::call(input, algorithm, use_cutlass); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_semi_structured_tile_cuda_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_semi_structured_tile_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..c1206ed07a860ed8ad4b6d0565f2547c3ba16aeb --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_semi_structured_tile_cuda_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API ::std::tuple _sparse_semi_structured_tile(const at::Tensor & input, c10::string_view algorithm="", bool use_cutlass=true); + +} // namespace cuda +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_semi_structured_tile_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_semi_structured_tile_native.h new file mode 100644 index 0000000000000000000000000000000000000000..440ebe7de7cdc68516ee4ae8db02441c5bd6108c --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_semi_structured_tile_native.h @@ -0,0 +1,21 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API ::std::tuple _sparse_semi_structured_tile(const at::Tensor & input, c10::string_view algorithm="", bool use_cutlass=true); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_semi_structured_tile_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_semi_structured_tile_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..727d173df0ae717df707db01acb22e59c9dd8e32 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_semi_structured_tile_ops.h @@ -0,0 +1,29 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _sparse_semi_structured_tile { + using schema = ::std::tuple (const at::Tensor &, c10::string_view, bool); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_sparse_semi_structured_tile"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_sparse_semi_structured_tile(Tensor input, str algorithm=\"\", bool use_cutlass=True) -> (Tensor, Tensor, Tensor, Tensor, Tensor)"; + static ::std::tuple call(const at::Tensor & input, c10::string_view algorithm, bool use_cutlass); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & input, c10::string_view algorithm, bool use_cutlass); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_softmax.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_softmax.h new file mode 100644 index 0000000000000000000000000000000000000000..91f1b4158b67279942d463ea17a38d839c911e5e --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_softmax.h @@ -0,0 +1,50 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_sparse_softmax.int(Tensor self, int dim, ScalarType? dtype=None) -> Tensor +inline at::Tensor _sparse_softmax(const at::Tensor & self, int64_t dim, ::std::optional dtype=::std::nullopt) { + return at::_ops::_sparse_softmax_int::call(self, dim, dtype); +} + +// aten::_sparse_softmax.Dimname(Tensor self, Dimname dim, *, ScalarType? dtype=None) -> Tensor +inline at::Tensor _sparse_softmax(const at::Tensor & self, at::Dimname dim, ::std::optional dtype=::std::nullopt) { + return at::_ops::_sparse_softmax_Dimname::call(self, dim, dtype); +} + +// aten::_sparse_softmax(Tensor self, int dim, bool half_to_float) -> Tensor +inline at::Tensor _sparse_softmax(const at::Tensor & self, int64_t dim, bool half_to_float) { + return at::_ops::_sparse_softmax::call(self, dim, half_to_float); +} + +// aten::_sparse_softmax.out(Tensor self, int dim, bool half_to_float, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _sparse_softmax_out(at::Tensor & out, const at::Tensor & self, int64_t dim, bool half_to_float) { + return at::_ops::_sparse_softmax_out::call(self, dim, half_to_float, out); +} +// aten::_sparse_softmax.out(Tensor self, int dim, bool half_to_float, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _sparse_softmax_outf(const at::Tensor & self, int64_t dim, bool half_to_float, at::Tensor & out) { + return at::_ops::_sparse_softmax_out::call(self, dim, half_to_float, out); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_softmax_backward_data.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_softmax_backward_data.h new file mode 100644 index 0000000000000000000000000000000000000000..6c7e299b59395e06ecd4a7b895a1d24afbc19be2 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_softmax_backward_data.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_sparse_softmax_backward_data(Tensor grad_output, Tensor output, int dim, Tensor self) -> Tensor +inline at::Tensor _sparse_softmax_backward_data(const at::Tensor & grad_output, const at::Tensor & output, int64_t dim, const at::Tensor & self) { + return at::_ops::_sparse_softmax_backward_data::call(grad_output, output, dim, self); +} + +// aten::_sparse_softmax_backward_data.out(Tensor grad_output, Tensor output, int dim, Tensor self, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _sparse_softmax_backward_data_out(at::Tensor & out, const at::Tensor & grad_output, const at::Tensor & output, int64_t dim, const at::Tensor & self) { + return at::_ops::_sparse_softmax_backward_data_out::call(grad_output, output, dim, self, out); +} +// aten::_sparse_softmax_backward_data.out(Tensor grad_output, Tensor output, int dim, Tensor self, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _sparse_softmax_backward_data_outf(const at::Tensor & grad_output, const at::Tensor & output, int64_t dim, const at::Tensor & self, at::Tensor & out) { + return at::_ops::_sparse_softmax_backward_data_out::call(grad_output, output, dim, self, out); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_softmax_backward_data_compositeexplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_softmax_backward_data_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..31f4dbaba7e171c17d5dc810d58e2510e3b81326 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_softmax_backward_data_compositeexplicitautograd_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor & _sparse_softmax_backward_data_out(at::Tensor & out, const at::Tensor & grad_output, const at::Tensor & output, int64_t dim, const at::Tensor & self); +TORCH_API at::Tensor & _sparse_softmax_backward_data_outf(const at::Tensor & grad_output, const at::Tensor & output, int64_t dim, const at::Tensor & self, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_softmax_backward_data_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_softmax_backward_data_native.h new file mode 100644 index 0000000000000000000000000000000000000000..90c2aa546c8380ab9d84f3c3bb64e91e2b6524bc --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_softmax_backward_data_native.h @@ -0,0 +1,23 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor & _sparse_softmax_backward_data_out(const at::Tensor & grad_output, const at::Tensor & output, int64_t dim, const at::Tensor & self, at::Tensor & out); +TORCH_API at::Tensor softmax_backward_sparse_cpu(const at::Tensor & grad_output, const at::Tensor & output, int64_t dim, const at::Tensor & self); +TORCH_API at::Tensor softmax_backward_sparse_cuda(const at::Tensor & grad_output, const at::Tensor & output, int64_t dim, const at::Tensor & self); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_softmax_backward_data_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_softmax_backward_data_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..a105fbbd401a23fce4719fd793730b99d39005dc --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_softmax_backward_data_ops.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _sparse_softmax_backward_data { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &, int64_t, const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_sparse_softmax_backward_data"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_sparse_softmax_backward_data(Tensor grad_output, Tensor output, int dim, Tensor self) -> Tensor"; + static at::Tensor call(const at::Tensor & grad_output, const at::Tensor & output, int64_t dim, const at::Tensor & self); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & grad_output, const at::Tensor & output, int64_t dim, const at::Tensor & self); +}; + +struct TORCH_API _sparse_softmax_backward_data_out { + using schema = at::Tensor & (const at::Tensor &, const at::Tensor &, int64_t, const at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_sparse_softmax_backward_data"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_sparse_softmax_backward_data.out(Tensor grad_output, Tensor output, int dim, Tensor self, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & grad_output, const at::Tensor & output, int64_t dim, const at::Tensor & self, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & grad_output, const at::Tensor & output, int64_t dim, const at::Tensor & self, at::Tensor & out); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_softmax_compositeexplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_softmax_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..2d7aaa373a67fab9ebd1c20436e33f70b5a90210 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_softmax_compositeexplicitautograd_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor & _sparse_softmax_out(at::Tensor & out, const at::Tensor & self, int64_t dim, bool half_to_float); +TORCH_API at::Tensor & _sparse_softmax_outf(const at::Tensor & self, int64_t dim, bool half_to_float, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_softmax_compositeimplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_softmax_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..b64bd043f7932316d35fa75fb7a657cf9d5d0fc7 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_softmax_compositeimplicitautograd_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor _sparse_softmax(const at::Tensor & self, int64_t dim, ::std::optional dtype=::std::nullopt); +TORCH_API at::Tensor _sparse_softmax(const at::Tensor & self, at::Dimname dim, ::std::optional dtype=::std::nullopt); + +} // namespace compositeimplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_softmax_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_softmax_native.h new file mode 100644 index 0000000000000000000000000000000000000000..94450d91e0b6bd99d1c4f622ddedb29bc1700d4f --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_softmax_native.h @@ -0,0 +1,25 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor _sparse_softmax(const at::Tensor & self, int64_t dim, ::std::optional dtype=::std::nullopt); +TORCH_API at::Tensor _sparse_softmax(const at::Tensor & self, at::Dimname dim, ::std::optional dtype=::std::nullopt); +TORCH_API at::Tensor & _sparse_softmax_out(const at::Tensor & self, int64_t dim, bool half_to_float, at::Tensor & out); +TORCH_API at::Tensor softmax_sparse_cpu(const at::Tensor & self, int64_t dim, bool half_to_float); +TORCH_API at::Tensor softmax_sparse_cuda(const at::Tensor & self, int64_t dim, bool half_to_float); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_softmax_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_softmax_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..a3e576fcc569f736cf110a0baf53c3095ed5cb2e --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_softmax_ops.h @@ -0,0 +1,62 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _sparse_softmax_int { + using schema = at::Tensor (const at::Tensor &, int64_t, ::std::optional); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_sparse_softmax"; + static constexpr const char* overload_name = "int"; + static constexpr const char* schema_str = "_sparse_softmax.int(Tensor self, int dim, ScalarType? dtype=None) -> Tensor"; + static at::Tensor call(const at::Tensor & self, int64_t dim, ::std::optional dtype); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, int64_t dim, ::std::optional dtype); +}; + +struct TORCH_API _sparse_softmax_Dimname { + using schema = at::Tensor (const at::Tensor &, at::Dimname, ::std::optional); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_sparse_softmax"; + static constexpr const char* overload_name = "Dimname"; + static constexpr const char* schema_str = "_sparse_softmax.Dimname(Tensor self, Dimname dim, *, ScalarType? dtype=None) -> Tensor"; + static at::Tensor call(const at::Tensor & self, at::Dimname dim, ::std::optional dtype); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::Dimname dim, ::std::optional dtype); +}; + +struct TORCH_API _sparse_softmax { + using schema = at::Tensor (const at::Tensor &, int64_t, bool); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_sparse_softmax"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_sparse_softmax(Tensor self, int dim, bool half_to_float) -> Tensor"; + static at::Tensor call(const at::Tensor & self, int64_t dim, bool half_to_float); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, int64_t dim, bool half_to_float); +}; + +struct TORCH_API _sparse_softmax_out { + using schema = at::Tensor & (const at::Tensor &, int64_t, bool, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_sparse_softmax"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_sparse_softmax.out(Tensor self, int dim, bool half_to_float, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, int64_t dim, bool half_to_float, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, int64_t dim, bool half_to_float, at::Tensor & out); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_sparse_matmul.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_sparse_matmul.h new file mode 100644 index 0000000000000000000000000000000000000000..822a091085ee421de302c881ed23a7dd1424ebb0 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_sparse_matmul.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_sparse_sparse_matmul(Tensor self, Tensor other) -> Tensor +inline at::Tensor _sparse_sparse_matmul(const at::Tensor & self, const at::Tensor & other) { + return at::_ops::_sparse_sparse_matmul::call(self, other); +} + +// aten::_sparse_sparse_matmul.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _sparse_sparse_matmul_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & other) { + return at::_ops::_sparse_sparse_matmul_out::call(self, other, out); +} +// aten::_sparse_sparse_matmul.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _sparse_sparse_matmul_outf(const at::Tensor & self, const at::Tensor & other, at::Tensor & out) { + return at::_ops::_sparse_sparse_matmul_out::call(self, other, out); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_sparse_matmul_compositeexplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_sparse_matmul_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..0e3c77e025c8edfe3ff96eb08e52f91f490ab4ac --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_sparse_matmul_compositeexplicitautograd_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor & _sparse_sparse_matmul_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & other); +TORCH_API at::Tensor & _sparse_sparse_matmul_outf(const at::Tensor & self, const at::Tensor & other, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_sparse_matmul_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_sparse_matmul_native.h new file mode 100644 index 0000000000000000000000000000000000000000..a927512267a630c2badbc814fac28c8fe4c46829 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_sparse_matmul_native.h @@ -0,0 +1,23 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor & _sparse_sparse_matmul_out(const at::Tensor & self, const at::Tensor & other, at::Tensor & out); +TORCH_API at::Tensor sparse_sparse_matmul_cpu(const at::Tensor & self, const at::Tensor & other); +TORCH_API at::Tensor sparse_sparse_matmul_cuda(const at::Tensor & self, const at::Tensor & other); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_sparse_matmul_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_sparse_matmul_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..06d2d79bfc275efab53594af63b4bbca3324112c --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_sparse_matmul_ops.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _sparse_sparse_matmul { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_sparse_sparse_matmul"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_sparse_sparse_matmul(Tensor self, Tensor other) -> Tensor"; + static at::Tensor call(const at::Tensor & self, const at::Tensor & other); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & other); +}; + +struct TORCH_API _sparse_sparse_matmul_out { + using schema = at::Tensor & (const at::Tensor &, const at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_sparse_sparse_matmul"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_sparse_sparse_matmul.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, const at::Tensor & other, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & other, at::Tensor & out); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_sum.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_sum.h new file mode 100644 index 0000000000000000000000000000000000000000..9cae94903502ce3db9a81d57a65e79246dce0467 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_sum.h @@ -0,0 +1,55 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_sparse_sum(Tensor self) -> Tensor +inline at::Tensor _sparse_sum(const at::Tensor & self) { + return at::_ops::_sparse_sum::call(self); +} + +// aten::_sparse_sum.dtype(Tensor self, *, ScalarType dtype) -> Tensor +inline at::Tensor _sparse_sum(const at::Tensor & self, at::ScalarType dtype) { + return at::_ops::_sparse_sum_dtype::call(self, dtype); +} + +// aten::_sparse_sum.dim(Tensor self, int[1] dim) -> Tensor +inline at::Tensor _sparse_sum(const at::Tensor & self, at::IntArrayRef dim) { + return at::_ops::_sparse_sum_dim::call(self, dim); +} + +// aten::_sparse_sum.dim_dtype(Tensor self, int[1] dim, *, ScalarType dtype) -> Tensor +inline at::Tensor _sparse_sum(const at::Tensor & self, at::IntArrayRef dim, at::ScalarType dtype) { + return at::_ops::_sparse_sum_dim_dtype::call(self, dim, dtype); +} + +// aten::_sparse_sum.dim_out(Tensor self, int[1] dim, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _sparse_sum_out(at::Tensor & out, const at::Tensor & self, at::IntArrayRef dim) { + return at::_ops::_sparse_sum_dim_out::call(self, dim, out); +} +// aten::_sparse_sum.dim_out(Tensor self, int[1] dim, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _sparse_sum_outf(const at::Tensor & self, at::IntArrayRef dim, at::Tensor & out) { + return at::_ops::_sparse_sum_dim_out::call(self, dim, out); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_sum_backward.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_sum_backward.h new file mode 100644 index 0000000000000000000000000000000000000000..e547ec974cf2d1c6315e30357bb0e7c2d8c6dd5a --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_sum_backward.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_sparse_sum_backward(Tensor grad, Tensor self, int[] dim) -> Tensor +inline at::Tensor _sparse_sum_backward(const at::Tensor & grad, const at::Tensor & self, at::IntArrayRef dim) { + return at::_ops::_sparse_sum_backward::call(grad, self, dim); +} + +// aten::_sparse_sum_backward.out(Tensor grad, Tensor self, int[] dim, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _sparse_sum_backward_out(at::Tensor & out, const at::Tensor & grad, const at::Tensor & self, at::IntArrayRef dim) { + return at::_ops::_sparse_sum_backward_out::call(grad, self, dim, out); +} +// aten::_sparse_sum_backward.out(Tensor grad, Tensor self, int[] dim, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _sparse_sum_backward_outf(const at::Tensor & grad, const at::Tensor & self, at::IntArrayRef dim, at::Tensor & out) { + return at::_ops::_sparse_sum_backward_out::call(grad, self, dim, out); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_sum_backward_compositeexplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_sum_backward_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..97156fd46fbe1a0f83ae33a0415492e60a21669d --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_sum_backward_compositeexplicitautograd_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor & _sparse_sum_backward_out(at::Tensor & out, const at::Tensor & grad, const at::Tensor & self, at::IntArrayRef dim); +TORCH_API at::Tensor & _sparse_sum_backward_outf(const at::Tensor & grad, const at::Tensor & self, at::IntArrayRef dim, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_sum_backward_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_sum_backward_native.h new file mode 100644 index 0000000000000000000000000000000000000000..141e42e3c2b7c8d1db1e0e1f0035601ea0ae9246 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_sum_backward_native.h @@ -0,0 +1,23 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor & _sparse_sum_backward_out(const at::Tensor & grad, const at::Tensor & self, at::IntArrayRef dim, at::Tensor & out); +TORCH_API at::Tensor _sparse_sum_backward_cpu(const at::Tensor & grad, const at::Tensor & self, at::IntArrayRef dim); +TORCH_API at::Tensor _sparse_sum_backward_cuda(const at::Tensor & grad, const at::Tensor & self, at::IntArrayRef dim); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_sum_backward_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_sum_backward_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..d36440dc505105b7d402e27679cedee21cf42d22 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_sum_backward_ops.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _sparse_sum_backward { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &, at::IntArrayRef); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_sparse_sum_backward"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_sparse_sum_backward(Tensor grad, Tensor self, int[] dim) -> Tensor"; + static at::Tensor call(const at::Tensor & grad, const at::Tensor & self, at::IntArrayRef dim); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & grad, const at::Tensor & self, at::IntArrayRef dim); +}; + +struct TORCH_API _sparse_sum_backward_out { + using schema = at::Tensor & (const at::Tensor &, const at::Tensor &, at::IntArrayRef, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_sparse_sum_backward"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_sparse_sum_backward.out(Tensor grad, Tensor self, int[] dim, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & grad, const at::Tensor & self, at::IntArrayRef dim, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & grad, const at::Tensor & self, at::IntArrayRef dim, at::Tensor & out); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_sum_compositeexplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_sum_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..240162a12de325a780d145bc54ba1c33cff5779f --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_sum_compositeexplicitautograd_dispatch.h @@ -0,0 +1,25 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor _sparse_sum(const at::Tensor & self, at::IntArrayRef dim); +TORCH_API at::Tensor & _sparse_sum_out(at::Tensor & out, const at::Tensor & self, at::IntArrayRef dim); +TORCH_API at::Tensor & _sparse_sum_outf(const at::Tensor & self, at::IntArrayRef dim, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_sum_compositeimplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_sum_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..088b11a3985bc086dce3f0cf34a65c04c8e91d90 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_sum_compositeimplicitautograd_dispatch.h @@ -0,0 +1,25 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor _sparse_sum(const at::Tensor & self); +TORCH_API at::Tensor _sparse_sum(const at::Tensor & self, at::ScalarType dtype); +TORCH_API at::Tensor _sparse_sum(const at::Tensor & self, at::IntArrayRef dim, at::ScalarType dtype); + +} // namespace compositeimplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_sum_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_sum_native.h new file mode 100644 index 0000000000000000000000000000000000000000..39562a5fa655737aa70830a8b34773e15e70a3b6 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_sum_native.h @@ -0,0 +1,25 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor _sparse_sum(const at::Tensor & self); +TORCH_API at::Tensor _sparse_sum(const at::Tensor & self, at::ScalarType dtype); +TORCH_API at::Tensor _sparse_sum(const at::Tensor & self, at::IntArrayRef dim); +TORCH_API at::Tensor & _sparse_sum_dim_out(const at::Tensor & self, at::IntArrayRef dim, at::Tensor & out); +TORCH_API at::Tensor _sparse_sum(const at::Tensor & self, at::IntArrayRef dim, at::ScalarType dtype); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_sum_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_sum_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..0772bd2752b7f5a4b43e1377212ff4d3b9465f4d --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_sparse_sum_ops.h @@ -0,0 +1,73 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _sparse_sum { + using schema = at::Tensor (const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_sparse_sum"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_sparse_sum(Tensor self) -> Tensor"; + static at::Tensor call(const at::Tensor & self); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self); +}; + +struct TORCH_API _sparse_sum_dtype { + using schema = at::Tensor (const at::Tensor &, at::ScalarType); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_sparse_sum"; + static constexpr const char* overload_name = "dtype"; + static constexpr const char* schema_str = "_sparse_sum.dtype(Tensor self, *, ScalarType dtype) -> Tensor"; + static at::Tensor call(const at::Tensor & self, at::ScalarType dtype); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::ScalarType dtype); +}; + +struct TORCH_API _sparse_sum_dim { + using schema = at::Tensor (const at::Tensor &, at::IntArrayRef); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_sparse_sum"; + static constexpr const char* overload_name = "dim"; + static constexpr const char* schema_str = "_sparse_sum.dim(Tensor self, int[1] dim) -> Tensor"; + static at::Tensor call(const at::Tensor & self, at::IntArrayRef dim); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::IntArrayRef dim); +}; + +struct TORCH_API _sparse_sum_dim_dtype { + using schema = at::Tensor (const at::Tensor &, at::IntArrayRef, at::ScalarType); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_sparse_sum"; + static constexpr const char* overload_name = "dim_dtype"; + static constexpr const char* schema_str = "_sparse_sum.dim_dtype(Tensor self, int[1] dim, *, ScalarType dtype) -> Tensor"; + static at::Tensor call(const at::Tensor & self, at::IntArrayRef dim, at::ScalarType dtype); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::IntArrayRef dim, at::ScalarType dtype); +}; + +struct TORCH_API _sparse_sum_dim_out { + using schema = at::Tensor & (const at::Tensor &, at::IntArrayRef, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_sparse_sum"; + static constexpr const char* overload_name = "dim_out"; + static constexpr const char* schema_str = "_sparse_sum.dim_out(Tensor self, int[1] dim, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, at::IntArrayRef dim, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::IntArrayRef dim, at::Tensor & out); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_spdiags.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_spdiags.h new file mode 100644 index 0000000000000000000000000000000000000000..0869266c86034d53a4a3655ad53fca380a10fff2 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_spdiags.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_spdiags(Tensor diagonals, Tensor offsets, int[] shape, Layout? layout=None) -> Tensor +inline at::Tensor _spdiags(const at::Tensor & diagonals, const at::Tensor & offsets, at::IntArrayRef shape, ::std::optional layout=::std::nullopt) { + return at::_ops::_spdiags::call(diagonals, offsets, shape, layout); +} + +// aten::_spdiags.out(Tensor diagonals, Tensor offsets, int[] shape, Layout? layout=None, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _spdiags_out(at::Tensor & out, const at::Tensor & diagonals, const at::Tensor & offsets, at::IntArrayRef shape, ::std::optional layout=::std::nullopt) { + return at::_ops::_spdiags_out::call(diagonals, offsets, shape, layout, out); +} +// aten::_spdiags.out(Tensor diagonals, Tensor offsets, int[] shape, Layout? layout=None, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _spdiags_outf(const at::Tensor & diagonals, const at::Tensor & offsets, at::IntArrayRef shape, ::std::optional layout, at::Tensor & out) { + return at::_ops::_spdiags_out::call(diagonals, offsets, shape, layout, out); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_spdiags_compositeexplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_spdiags_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..2cacee4e3553ec33de92556c8d71c8a46c77cdeb --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_spdiags_compositeexplicitautograd_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor & _spdiags_out(at::Tensor & out, const at::Tensor & diagonals, const at::Tensor & offsets, at::IntArrayRef shape, ::std::optional layout=::std::nullopt); +TORCH_API at::Tensor & _spdiags_outf(const at::Tensor & diagonals, const at::Tensor & offsets, at::IntArrayRef shape, ::std::optional layout, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_spdiags_cpu_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_spdiags_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..76a2e099dabfe78140615ad80da81817ff1ad8e6 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_spdiags_cpu_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor _spdiags(const at::Tensor & diagonals, const at::Tensor & offsets, at::IntArrayRef shape, ::std::optional layout=::std::nullopt); + +} // namespace cpu +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_spdiags_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_spdiags_native.h new file mode 100644 index 0000000000000000000000000000000000000000..6ffe9dc67d574788eec4cc220eeb94bc0e3b1967 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_spdiags_native.h @@ -0,0 +1,22 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor & _spdiags_out(const at::Tensor & diagonals, const at::Tensor & offsets, at::IntArrayRef shape, ::std::optional layout, at::Tensor & out); +TORCH_API at::Tensor spdiags(const at::Tensor & diagonals, const at::Tensor & offsets, at::IntArrayRef shape, ::std::optional layout=::std::nullopt); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_spdiags_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_spdiags_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..46759bdeb41022a01b574d766def64d88c413b48 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_spdiags_ops.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _spdiags { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &, at::IntArrayRef, ::std::optional); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_spdiags"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_spdiags(Tensor diagonals, Tensor offsets, int[] shape, Layout? layout=None) -> Tensor"; + static at::Tensor call(const at::Tensor & diagonals, const at::Tensor & offsets, at::IntArrayRef shape, ::std::optional layout); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & diagonals, const at::Tensor & offsets, at::IntArrayRef shape, ::std::optional layout); +}; + +struct TORCH_API _spdiags_out { + using schema = at::Tensor & (const at::Tensor &, const at::Tensor &, at::IntArrayRef, ::std::optional, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_spdiags"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_spdiags.out(Tensor diagonals, Tensor offsets, int[] shape, Layout? layout=None, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & diagonals, const at::Tensor & offsets, at::IntArrayRef shape, ::std::optional layout, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & diagonals, const at::Tensor & offsets, at::IntArrayRef shape, ::std::optional layout, at::Tensor & out); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_spsolve.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_spsolve.h new file mode 100644 index 0000000000000000000000000000000000000000..56ecdd9fd3ad4703177ae2e5b3db0c4c035ea74e --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_spsolve.h @@ -0,0 +1,31 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_spsolve(Tensor A, Tensor B, *, bool left=True) -> Tensor +inline at::Tensor _spsolve(const at::Tensor & A, const at::Tensor & B, bool left=true) { + return at::_ops::_spsolve::call(A, B, left); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_spsolve_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_spsolve_native.h new file mode 100644 index 0000000000000000000000000000000000000000..25b2601afe6c0ee3510ce82079bdc69976aaf797 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_spsolve_native.h @@ -0,0 +1,21 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor _sparse_csr_linear_solve(const at::Tensor & A, const at::Tensor & B, bool left=true); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_spsolve_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_spsolve_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..d07ef6cda7bca30f28b586139f2a66413b9ba9b2 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_spsolve_ops.h @@ -0,0 +1,29 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _spsolve { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &, bool); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_spsolve"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_spsolve(Tensor A, Tensor B, *, bool left=True) -> Tensor"; + static at::Tensor call(const at::Tensor & A, const at::Tensor & B, bool left); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & A, const at::Tensor & B, bool left); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_stack.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_stack.h new file mode 100644 index 0000000000000000000000000000000000000000..e39ec89ed1ceaed0b464cf0a99f10fd651aa3e84 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_stack.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_stack(Tensor[] tensors, int dim=0) -> Tensor +inline at::Tensor _stack(at::TensorList tensors, int64_t dim=0) { + return at::_ops::_stack::call(tensors, dim); +} + +// aten::_stack.out(Tensor[] tensors, int dim=0, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _stack_out(at::Tensor & out, at::TensorList tensors, int64_t dim=0) { + return at::_ops::_stack_out::call(tensors, dim, out); +} +// aten::_stack.out(Tensor[] tensors, int dim=0, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _stack_outf(at::TensorList tensors, int64_t dim, at::Tensor & out) { + return at::_ops::_stack_out::call(tensors, dim, out); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_stack_compositeexplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_stack_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..98165529e74dadd837edff5ed293ae79b3aacada --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_stack_compositeexplicitautograd_dispatch.h @@ -0,0 +1,25 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor _stack(at::TensorList tensors, int64_t dim=0); +TORCH_API at::Tensor & _stack_out(at::Tensor & out, at::TensorList tensors, int64_t dim=0); +TORCH_API at::Tensor & _stack_outf(at::TensorList tensors, int64_t dim, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_stack_cpu_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_stack_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..bcccf71d8a8359bc339e8a9e2c4e3b3800146a52 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_stack_cpu_dispatch.h @@ -0,0 +1,25 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor _stack(at::TensorList tensors, int64_t dim=0); +TORCH_API at::Tensor & _stack_out(at::Tensor & out, at::TensorList tensors, int64_t dim=0); +TORCH_API at::Tensor & _stack_outf(at::TensorList tensors, int64_t dim, at::Tensor & out); + +} // namespace cpu +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_stack_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_stack_native.h new file mode 100644 index 0000000000000000000000000000000000000000..c1762b0d6bc8f52fffbe36e76a159f51b2c63c35 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_stack_native.h @@ -0,0 +1,24 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor _stack(at::TensorList tensors, int64_t dim=0); +TORCH_API at::Tensor & _stack_out(at::TensorList tensors, int64_t dim, at::Tensor & out); +TORCH_API at::Tensor _stack_cpu(at::TensorList tensors, int64_t dim=0); +TORCH_API at::Tensor & _stack_out_cpu(at::TensorList tensors, int64_t dim, at::Tensor & out); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_stack_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_stack_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..e354410c010dd0cd9f1804a7c409946a52ead4ab --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_stack_ops.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _stack { + using schema = at::Tensor (at::TensorList, int64_t); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_stack"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_stack(Tensor[] tensors, int dim=0) -> Tensor"; + static at::Tensor call(at::TensorList tensors, int64_t dim); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, at::TensorList tensors, int64_t dim); +}; + +struct TORCH_API _stack_out { + using schema = at::Tensor & (at::TensorList, int64_t, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_stack"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_stack.out(Tensor[] tensors, int dim=0, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(at::TensorList tensors, int64_t dim, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, at::TensorList tensors, int64_t dim, at::Tensor & out); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_standard_gamma.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_standard_gamma.h new file mode 100644 index 0000000000000000000000000000000000000000..77f4986c793c36caad7f4406c21296efad01e0fc --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_standard_gamma.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_standard_gamma(Tensor self, Generator? generator=None) -> Tensor +inline at::Tensor _standard_gamma(const at::Tensor & self, ::std::optional generator=::std::nullopt) { + return at::_ops::_standard_gamma::call(self, generator); +} + +// aten::_standard_gamma.out(Tensor self, Generator? generator=None, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _standard_gamma_out(at::Tensor & out, const at::Tensor & self, ::std::optional generator=::std::nullopt) { + return at::_ops::_standard_gamma_out::call(self, generator, out); +} +// aten::_standard_gamma.out(Tensor self, Generator? generator=None, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _standard_gamma_outf(const at::Tensor & self, ::std::optional generator, at::Tensor & out) { + return at::_ops::_standard_gamma_out::call(self, generator, out); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_standard_gamma_compositeexplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_standard_gamma_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..f88e50795c463ff18199d16adc184121927617c3 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_standard_gamma_compositeexplicitautograd_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor & _standard_gamma_out(at::Tensor & out, const at::Tensor & self, ::std::optional generator=::std::nullopt); +TORCH_API at::Tensor & _standard_gamma_outf(const at::Tensor & self, ::std::optional generator, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_standard_gamma_cpu_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_standard_gamma_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..895c8338095061835f1effa1c5aca1f9e0a51f4f --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_standard_gamma_cpu_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor _standard_gamma(const at::Tensor & self, ::std::optional generator=::std::nullopt); + +} // namespace cpu +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_standard_gamma_cuda_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_standard_gamma_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..ea4017990dce5396c5bb4a8ea54433280d63dedd --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_standard_gamma_cuda_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor _standard_gamma(const at::Tensor & self, ::std::optional generator=::std::nullopt); + +} // namespace cuda +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_standard_gamma_grad.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_standard_gamma_grad.h new file mode 100644 index 0000000000000000000000000000000000000000..4633248b4f2f3642859ab4ceabaffff006f29dee --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_standard_gamma_grad.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_standard_gamma_grad(Tensor self, Tensor output) -> Tensor +inline at::Tensor _standard_gamma_grad(const at::Tensor & self, const at::Tensor & output) { + return at::_ops::_standard_gamma_grad::call(self, output); +} + +// aten::_standard_gamma_grad.out(Tensor self, Tensor output, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _standard_gamma_grad_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & output) { + return at::_ops::_standard_gamma_grad_out::call(self, output, out); +} +// aten::_standard_gamma_grad.out(Tensor self, Tensor output, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _standard_gamma_grad_outf(const at::Tensor & self, const at::Tensor & output, at::Tensor & out) { + return at::_ops::_standard_gamma_grad_out::call(self, output, out); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_standard_gamma_grad_compositeexplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_standard_gamma_grad_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..c89e2f7bb320d82d2741461b1f9ceaae062bd318 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_standard_gamma_grad_compositeexplicitautograd_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor & _standard_gamma_grad_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & output); +TORCH_API at::Tensor & _standard_gamma_grad_outf(const at::Tensor & self, const at::Tensor & output, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_standard_gamma_grad_cpu_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_standard_gamma_grad_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..4d46d81afc7cea905a5edea83f44962fc0b6ce77 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_standard_gamma_grad_cpu_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor _standard_gamma_grad(const at::Tensor & self, const at::Tensor & output); + +} // namespace cpu +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_standard_gamma_grad_cuda_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_standard_gamma_grad_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..474728c818ee562ca510efc5b72cf7f600cd021f --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_standard_gamma_grad_cuda_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor _standard_gamma_grad(const at::Tensor & self, const at::Tensor & output); + +} // namespace cuda +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_standard_gamma_grad_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_standard_gamma_grad_native.h new file mode 100644 index 0000000000000000000000000000000000000000..022380cd6144894694adb12e6218f157d6bb04f3 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_standard_gamma_grad_native.h @@ -0,0 +1,23 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor & _standard_gamma_grad_out(const at::Tensor & self, const at::Tensor & output, at::Tensor & out); +TORCH_API at::Tensor _standard_gamma_grad_cpu(const at::Tensor & self, const at::Tensor & output); +TORCH_API at::Tensor _standard_gamma_grad_cuda(const at::Tensor & self, const at::Tensor & output); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_standard_gamma_grad_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_standard_gamma_grad_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..1712b24fe34acf1c0853e8a3812d78605394c36e --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_standard_gamma_grad_ops.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _standard_gamma_grad { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_standard_gamma_grad"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_standard_gamma_grad(Tensor self, Tensor output) -> Tensor"; + static at::Tensor call(const at::Tensor & self, const at::Tensor & output); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & output); +}; + +struct TORCH_API _standard_gamma_grad_out { + using schema = at::Tensor & (const at::Tensor &, const at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_standard_gamma_grad"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_standard_gamma_grad.out(Tensor self, Tensor output, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, const at::Tensor & output, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & output, at::Tensor & out); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_standard_gamma_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_standard_gamma_native.h new file mode 100644 index 0000000000000000000000000000000000000000..025775992fcc76f7ea552d89b600810290c60125 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_standard_gamma_native.h @@ -0,0 +1,23 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor & _standard_gamma_out(const at::Tensor & self, ::std::optional generator, at::Tensor & out); +TORCH_API at::Tensor _s_gamma_cpu(const at::Tensor & self, ::std::optional generator=::std::nullopt); +TORCH_API at::Tensor _s_gamma_cuda(const at::Tensor & self, ::std::optional generator=::std::nullopt); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_standard_gamma_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_standard_gamma_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..30d797286fbd0095904fe208af4cefe954fccfa2 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_standard_gamma_ops.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _standard_gamma { + using schema = at::Tensor (const at::Tensor &, ::std::optional); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_standard_gamma"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_standard_gamma(Tensor self, Generator? generator=None) -> Tensor"; + static at::Tensor call(const at::Tensor & self, ::std::optional generator); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, ::std::optional generator); +}; + +struct TORCH_API _standard_gamma_out { + using schema = at::Tensor & (const at::Tensor &, ::std::optional, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_standard_gamma"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_standard_gamma.out(Tensor self, Generator? generator=None, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, ::std::optional generator, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, ::std::optional generator, at::Tensor & out); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_ambiguous_defaults.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_ambiguous_defaults.h new file mode 100644 index 0000000000000000000000000000000000000000..bcc7e94d1fb177fb8a3e5a0634a7ef05c2a27b98 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_ambiguous_defaults.h @@ -0,0 +1,36 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_test_ambiguous_defaults.a(Tensor dummy, int a=1, int b=1) -> Tensor +inline at::Tensor _test_ambiguous_defaults(const at::Tensor & dummy, int64_t a=1, int64_t b=1) { + return at::_ops::_test_ambiguous_defaults_a::call(dummy, a, b); +} + +// aten::_test_ambiguous_defaults.b(Tensor dummy, int a=2, str b="2") -> Tensor +inline at::Tensor _test_ambiguous_defaults(const at::Tensor & dummy, int64_t a, c10::string_view b) { + return at::_ops::_test_ambiguous_defaults_b::call(dummy, a, b); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_ambiguous_defaults_compositeimplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_ambiguous_defaults_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..90e75a1d160a156a16f00ebfaaf9f4def027e000 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_ambiguous_defaults_compositeimplicitautograd_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor _test_ambiguous_defaults(const at::Tensor & dummy, int64_t a=1, int64_t b=1); +TORCH_API at::Tensor _test_ambiguous_defaults(const at::Tensor & dummy, int64_t a, c10::string_view b); + +} // namespace compositeimplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_ambiguous_defaults_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_ambiguous_defaults_native.h new file mode 100644 index 0000000000000000000000000000000000000000..78a23c2ca60dd5582dbc059d64bdc93f8c7dc593 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_ambiguous_defaults_native.h @@ -0,0 +1,22 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor _test_ambiguous_defaults(const at::Tensor & dummy, int64_t a=1, int64_t b=1); +TORCH_API at::Tensor _test_ambiguous_defaults(const at::Tensor & dummy, int64_t a=2, c10::string_view b="2"); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_ambiguous_defaults_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_ambiguous_defaults_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..db01186d229aabcdea40b1a8492fbfe2f356135f --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_ambiguous_defaults_ops.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _test_ambiguous_defaults_a { + using schema = at::Tensor (const at::Tensor &, int64_t, int64_t); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_test_ambiguous_defaults"; + static constexpr const char* overload_name = "a"; + static constexpr const char* schema_str = "_test_ambiguous_defaults.a(Tensor dummy, int a=1, int b=1) -> Tensor"; + static at::Tensor call(const at::Tensor & dummy, int64_t a, int64_t b); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & dummy, int64_t a, int64_t b); +}; + +struct TORCH_API _test_ambiguous_defaults_b { + using schema = at::Tensor (const at::Tensor &, int64_t, c10::string_view); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_test_ambiguous_defaults"; + static constexpr const char* overload_name = "b"; + static constexpr const char* schema_str = "_test_ambiguous_defaults.b(Tensor dummy, int a=2, str b=\"2\") -> Tensor"; + static at::Tensor call(const at::Tensor & dummy, int64_t a, c10::string_view b); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & dummy, int64_t a, c10::string_view b); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_autograd_multiple_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_autograd_multiple_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..6ca296734e889118376e303e1d600d0a3652f90d --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_autograd_multiple_dispatch.h @@ -0,0 +1,45 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_test_autograd_multiple_dispatch.fullcoverage(Tensor self) -> Tensor +inline at::Tensor _test_autograd_multiple_dispatch(const at::Tensor & self) { + return at::_ops::_test_autograd_multiple_dispatch_fullcoverage::call(self); +} + +// aten::_test_autograd_multiple_dispatch.ntonly(Tensor self, bool b) -> Tensor +inline at::Tensor _test_autograd_multiple_dispatch(const at::Tensor & self, bool b) { + return at::_ops::_test_autograd_multiple_dispatch_ntonly::call(self, b); +} + +// aten::_test_autograd_multiple_dispatch.fullcoverage_out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _test_autograd_multiple_dispatch_out(at::Tensor & out, const at::Tensor & self) { + return at::_ops::_test_autograd_multiple_dispatch_fullcoverage_out::call(self, out); +} +// aten::_test_autograd_multiple_dispatch.fullcoverage_out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _test_autograd_multiple_dispatch_outf(const at::Tensor & self, at::Tensor & out) { + return at::_ops::_test_autograd_multiple_dispatch_fullcoverage_out::call(self, out); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_autograd_multiple_dispatch_compositeexplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_autograd_multiple_dispatch_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..d31d90dd93566527a2da6a27387ac3e49c11bc75 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_autograd_multiple_dispatch_compositeexplicitautograd_dispatch.h @@ -0,0 +1,25 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor _test_autograd_multiple_dispatch(const at::Tensor & self); +TORCH_API at::Tensor & _test_autograd_multiple_dispatch_out(at::Tensor & out, const at::Tensor & self); +TORCH_API at::Tensor & _test_autograd_multiple_dispatch_outf(const at::Tensor & self, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_autograd_multiple_dispatch_compositeimplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_autograd_multiple_dispatch_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..9fb5d9f63261c46f3e6b847c89bd48727fdca363 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_autograd_multiple_dispatch_compositeimplicitautograd_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor _test_autograd_multiple_dispatch(const at::Tensor & self, bool b); + +} // namespace compositeimplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_autograd_multiple_dispatch_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_autograd_multiple_dispatch_native.h new file mode 100644 index 0000000000000000000000000000000000000000..b42808d91f65ef928018b87644b0ca2f7ce7b78f --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_autograd_multiple_dispatch_native.h @@ -0,0 +1,23 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor _test_autograd_multiple_dispatch_fullcoverage(const at::Tensor & self); +TORCH_API at::Tensor & _test_autograd_multiple_dispatch_fullcoverage_out(const at::Tensor & self, at::Tensor & out); +TORCH_API at::Tensor _test_autograd_multiple_dispatch_ntonly(const at::Tensor & self, bool b); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_autograd_multiple_dispatch_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_autograd_multiple_dispatch_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..6dafea9a0252b918ed5fe6615111d1c1cffda898 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_autograd_multiple_dispatch_ops.h @@ -0,0 +1,51 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _test_autograd_multiple_dispatch_fullcoverage { + using schema = at::Tensor (const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_test_autograd_multiple_dispatch"; + static constexpr const char* overload_name = "fullcoverage"; + static constexpr const char* schema_str = "_test_autograd_multiple_dispatch.fullcoverage(Tensor self) -> Tensor"; + static at::Tensor call(const at::Tensor & self); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self); +}; + +struct TORCH_API _test_autograd_multiple_dispatch_ntonly { + using schema = at::Tensor (const at::Tensor &, bool); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_test_autograd_multiple_dispatch"; + static constexpr const char* overload_name = "ntonly"; + static constexpr const char* schema_str = "_test_autograd_multiple_dispatch.ntonly(Tensor self, bool b) -> Tensor"; + static at::Tensor call(const at::Tensor & self, bool b); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, bool b); +}; + +struct TORCH_API _test_autograd_multiple_dispatch_fullcoverage_out { + using schema = at::Tensor & (const at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_test_autograd_multiple_dispatch"; + static constexpr const char* overload_name = "fullcoverage_out"; + static constexpr const char* schema_str = "_test_autograd_multiple_dispatch.fullcoverage_out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::Tensor & out); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_autograd_multiple_dispatch_view.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_autograd_multiple_dispatch_view.h new file mode 100644 index 0000000000000000000000000000000000000000..abc7d0989203151db9e501c80cb0795b3331d222 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_autograd_multiple_dispatch_view.h @@ -0,0 +1,31 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_test_autograd_multiple_dispatch_view(Tensor(a) self) -> Tensor(a) +inline at::Tensor _test_autograd_multiple_dispatch_view(const at::Tensor & self) { + return at::_ops::_test_autograd_multiple_dispatch_view::call(self); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_autograd_multiple_dispatch_view_compositeexplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_autograd_multiple_dispatch_view_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..97ac4c5c5ef9d1cf8e80ea23e6d261a5a493c601 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_autograd_multiple_dispatch_view_compositeexplicitautograd_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor _test_autograd_multiple_dispatch_view(const at::Tensor & self); + +} // namespace compositeexplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_autograd_multiple_dispatch_view_copy.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_autograd_multiple_dispatch_view_copy.h new file mode 100644 index 0000000000000000000000000000000000000000..96e51543c36a70716f4b6d59b45380915253028c --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_autograd_multiple_dispatch_view_copy.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_test_autograd_multiple_dispatch_view_copy(Tensor self) -> Tensor +inline at::Tensor _test_autograd_multiple_dispatch_view_copy(const at::Tensor & self) { + return at::_ops::_test_autograd_multiple_dispatch_view_copy::call(self); +} + +// aten::_test_autograd_multiple_dispatch_view_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _test_autograd_multiple_dispatch_view_copy_out(at::Tensor & out, const at::Tensor & self) { + return at::_ops::_test_autograd_multiple_dispatch_view_copy_out::call(self, out); +} +// aten::_test_autograd_multiple_dispatch_view_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _test_autograd_multiple_dispatch_view_copy_outf(const at::Tensor & self, at::Tensor & out) { + return at::_ops::_test_autograd_multiple_dispatch_view_copy_out::call(self, out); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_autograd_multiple_dispatch_view_copy_compositeexplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_autograd_multiple_dispatch_view_copy_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..da48f64b99bc6fb360f58db2ba5c76961ae2034d --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_autograd_multiple_dispatch_view_copy_compositeexplicitautograd_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor & _test_autograd_multiple_dispatch_view_copy_out(at::Tensor & out, const at::Tensor & self); +TORCH_API at::Tensor & _test_autograd_multiple_dispatch_view_copy_outf(const at::Tensor & self, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_autograd_multiple_dispatch_view_copy_compositeexplicitautogradnonfunctional_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_autograd_multiple_dispatch_view_copy_compositeexplicitautogradnonfunctional_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..92d6b6fc2eb68f7fae9fea7e1022d0a0a3326fb8 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_autograd_multiple_dispatch_view_copy_compositeexplicitautogradnonfunctional_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautogradnonfunctional { + +TORCH_API at::Tensor _test_autograd_multiple_dispatch_view_copy(const at::Tensor & self); + +} // namespace compositeexplicitautogradnonfunctional +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_autograd_multiple_dispatch_view_copy_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_autograd_multiple_dispatch_view_copy_native.h new file mode 100644 index 0000000000000000000000000000000000000000..a3129614f6c84590baf599f4132f03c200455fbd --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_autograd_multiple_dispatch_view_copy_native.h @@ -0,0 +1,22 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor & _test_autograd_multiple_dispatch_view_copy_out(const at::Tensor & self, at::Tensor & out); +TORCH_API at::Tensor _test_autograd_multiple_dispatch_view_copy(const at::Tensor & self); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_autograd_multiple_dispatch_view_copy_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_autograd_multiple_dispatch_view_copy_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..e64425009d83389002f47d25be833007387248de --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_autograd_multiple_dispatch_view_copy_ops.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _test_autograd_multiple_dispatch_view_copy { + using schema = at::Tensor (const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_test_autograd_multiple_dispatch_view_copy"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_test_autograd_multiple_dispatch_view_copy(Tensor self) -> Tensor"; + static at::Tensor call(const at::Tensor & self); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self); +}; + +struct TORCH_API _test_autograd_multiple_dispatch_view_copy_out { + using schema = at::Tensor & (const at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_test_autograd_multiple_dispatch_view_copy"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_test_autograd_multiple_dispatch_view_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::Tensor & out); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_autograd_multiple_dispatch_view_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_autograd_multiple_dispatch_view_native.h new file mode 100644 index 0000000000000000000000000000000000000000..4b1c2bc1f369c6ff0b72b82d63e6b46d7d7a8573 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_autograd_multiple_dispatch_view_native.h @@ -0,0 +1,21 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor _test_autograd_multiple_dispatch_view(const at::Tensor & self); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_autograd_multiple_dispatch_view_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_autograd_multiple_dispatch_view_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..0ddfbcf6266b4726ef522cafb79c3f4167da5f61 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_autograd_multiple_dispatch_view_ops.h @@ -0,0 +1,29 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _test_autograd_multiple_dispatch_view { + using schema = at::Tensor (const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_test_autograd_multiple_dispatch_view"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_test_autograd_multiple_dispatch_view(Tensor(a) self) -> Tensor(a)"; + static at::Tensor call(const at::Tensor & self); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_check_tensor.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_check_tensor.h new file mode 100644 index 0000000000000000000000000000000000000000..e413c5fd1e57f0b02ff10b3fed44d6679c0428e7 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_check_tensor.h @@ -0,0 +1,31 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_test_check_tensor(Tensor self) -> Tensor +inline at::Tensor _test_check_tensor(const at::Tensor & self) { + return at::_ops::_test_check_tensor::call(self); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_check_tensor_compositeimplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_check_tensor_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..60119e291eba62596f0cffb052b1b1ca16fc3571 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_check_tensor_compositeimplicitautograd_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor _test_check_tensor(const at::Tensor & self); + +} // namespace compositeimplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_check_tensor_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_check_tensor_native.h new file mode 100644 index 0000000000000000000000000000000000000000..03109a5d1515dc741c3893b280e8cdf827b50235 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_check_tensor_native.h @@ -0,0 +1,21 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor _test_check_tensor(const at::Tensor & self); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_check_tensor_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_check_tensor_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..9fd6511bf5a7dcd2576c323d7c85ca3b3a5bd08f --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_check_tensor_ops.h @@ -0,0 +1,29 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _test_check_tensor { + using schema = at::Tensor (const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_test_check_tensor"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_test_check_tensor(Tensor self) -> Tensor"; + static at::Tensor call(const at::Tensor & self); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_functorch_fallback.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_functorch_fallback.h new file mode 100644 index 0000000000000000000000000000000000000000..2deb5e95372a08385dbc507b056d6028cb6da8f9 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_functorch_fallback.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_test_functorch_fallback(Tensor self, Tensor other) -> Tensor +inline at::Tensor _test_functorch_fallback(const at::Tensor & self, const at::Tensor & other) { + return at::_ops::_test_functorch_fallback::call(self, other); +} + +// aten::_test_functorch_fallback.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _test_functorch_fallback_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & other) { + return at::_ops::_test_functorch_fallback_out::call(self, other, out); +} +// aten::_test_functorch_fallback.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _test_functorch_fallback_outf(const at::Tensor & self, const at::Tensor & other, at::Tensor & out) { + return at::_ops::_test_functorch_fallback_out::call(self, other, out); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_functorch_fallback_compositeexplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_functorch_fallback_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..9e8e6d9a5e10ca6dd38b8804a9de0ceef1e9556b --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_functorch_fallback_compositeexplicitautograd_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor & _test_functorch_fallback_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & other); +TORCH_API at::Tensor & _test_functorch_fallback_outf(const at::Tensor & self, const at::Tensor & other, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_functorch_fallback_cpu_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_functorch_fallback_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..02c1caf759056a5c1643a30d944a6f3487bc2902 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_functorch_fallback_cpu_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor _test_functorch_fallback(const at::Tensor & self, const at::Tensor & other); + +} // namespace cpu +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_functorch_fallback_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_functorch_fallback_native.h new file mode 100644 index 0000000000000000000000000000000000000000..088586c716224875cdc90cc12070d719f156e6e3 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_functorch_fallback_native.h @@ -0,0 +1,22 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor & _test_functorch_fallback_out(const at::Tensor & self, const at::Tensor & other, at::Tensor & out); +TORCH_API at::Tensor _test_functorch_fallback(const at::Tensor & self, const at::Tensor & other); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_functorch_fallback_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_functorch_fallback_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..f7850b8698cb2a4e756070eabd40e07006c04588 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_functorch_fallback_ops.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _test_functorch_fallback { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_test_functorch_fallback"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_test_functorch_fallback(Tensor self, Tensor other) -> Tensor"; + static at::Tensor call(const at::Tensor & self, const at::Tensor & other); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & other); +}; + +struct TORCH_API _test_functorch_fallback_out { + using schema = at::Tensor & (const at::Tensor &, const at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_test_functorch_fallback"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_test_functorch_fallback.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, const at::Tensor & other, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & other, at::Tensor & out); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_optional_filled_intlist.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_optional_filled_intlist.h new file mode 100644 index 0000000000000000000000000000000000000000..78f9752b111c9cbfaf700c07b4e224a149646b9b --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_optional_filled_intlist.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_test_optional_filled_intlist(Tensor values, int[2]? addends) -> Tensor +inline at::Tensor _test_optional_filled_intlist(const at::Tensor & values, at::OptionalIntArrayRef addends) { + return at::_ops::_test_optional_filled_intlist::call(values, addends); +} + +// aten::_test_optional_filled_intlist.out(Tensor values, int[2]? addends, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _test_optional_filled_intlist_out(at::Tensor & out, const at::Tensor & values, at::OptionalIntArrayRef addends) { + return at::_ops::_test_optional_filled_intlist_out::call(values, addends, out); +} +// aten::_test_optional_filled_intlist.out(Tensor values, int[2]? addends, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _test_optional_filled_intlist_outf(const at::Tensor & values, at::OptionalIntArrayRef addends, at::Tensor & out) { + return at::_ops::_test_optional_filled_intlist_out::call(values, addends, out); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_optional_filled_intlist_compositeexplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_optional_filled_intlist_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..2bba0b3bec8f2f84e95ef91c976197e2a4bea6d9 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_optional_filled_intlist_compositeexplicitautograd_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor & _test_optional_filled_intlist_out(at::Tensor & out, const at::Tensor & values, at::OptionalIntArrayRef addends); +TORCH_API at::Tensor & _test_optional_filled_intlist_outf(const at::Tensor & values, at::OptionalIntArrayRef addends, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_optional_filled_intlist_cpu_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_optional_filled_intlist_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..3497ac1fbfae0c5d0fba8a9bd17fb4ddea8f58e8 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_optional_filled_intlist_cpu_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor _test_optional_filled_intlist(const at::Tensor & values, at::OptionalIntArrayRef addends); + +} // namespace cpu +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_optional_filled_intlist_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_optional_filled_intlist_native.h new file mode 100644 index 0000000000000000000000000000000000000000..cb98a0daccf6fb79ac55f7a06ea62b68af18cfb3 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_optional_filled_intlist_native.h @@ -0,0 +1,22 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor & _test_optional_filled_intlist_out(const at::Tensor & values, at::OptionalIntArrayRef addends, at::Tensor & out); +TORCH_API at::Tensor _test_optional_intlist(const at::Tensor & values, at::OptionalIntArrayRef addends); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_optional_filled_intlist_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_optional_filled_intlist_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..164a825ef7d156a605c7592df886b09a172434cd --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_optional_filled_intlist_ops.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _test_optional_filled_intlist { + using schema = at::Tensor (const at::Tensor &, at::OptionalIntArrayRef); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_test_optional_filled_intlist"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_test_optional_filled_intlist(Tensor values, int[2]? addends) -> Tensor"; + static at::Tensor call(const at::Tensor & values, at::OptionalIntArrayRef addends); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & values, at::OptionalIntArrayRef addends); +}; + +struct TORCH_API _test_optional_filled_intlist_out { + using schema = at::Tensor & (const at::Tensor &, at::OptionalIntArrayRef, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_test_optional_filled_intlist"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_test_optional_filled_intlist.out(Tensor values, int[2]? addends, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & values, at::OptionalIntArrayRef addends, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & values, at::OptionalIntArrayRef addends, at::Tensor & out); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_optional_floatlist.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_optional_floatlist.h new file mode 100644 index 0000000000000000000000000000000000000000..552083f755a2c9727cc517313b10abc01c93d749 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_optional_floatlist.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_test_optional_floatlist(Tensor values, float[]? addends) -> Tensor +inline at::Tensor _test_optional_floatlist(const at::Tensor & values, ::std::optional> addends) { + return at::_ops::_test_optional_floatlist::call(values, addends); +} + +// aten::_test_optional_floatlist.out(Tensor values, float[]? addends, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _test_optional_floatlist_out(at::Tensor & out, const at::Tensor & values, ::std::optional> addends) { + return at::_ops::_test_optional_floatlist_out::call(values, addends, out); +} +// aten::_test_optional_floatlist.out(Tensor values, float[]? addends, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _test_optional_floatlist_outf(const at::Tensor & values, ::std::optional> addends, at::Tensor & out) { + return at::_ops::_test_optional_floatlist_out::call(values, addends, out); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_optional_floatlist_compositeexplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_optional_floatlist_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..fc669be412e43640c119da9db760a09428571436 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_optional_floatlist_compositeexplicitautograd_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor & _test_optional_floatlist_out(at::Tensor & out, const at::Tensor & values, ::std::optional> addends); +TORCH_API at::Tensor & _test_optional_floatlist_outf(const at::Tensor & values, ::std::optional> addends, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_optional_floatlist_cpu_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_optional_floatlist_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..fe3f01a82c005118c4f89cb315623824a8870380 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_optional_floatlist_cpu_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor _test_optional_floatlist(const at::Tensor & values, ::std::optional> addends); + +} // namespace cpu +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_optional_floatlist_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_optional_floatlist_native.h new file mode 100644 index 0000000000000000000000000000000000000000..8916691acf80b3310aa687aa22a8ec819cdaa3f7 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_optional_floatlist_native.h @@ -0,0 +1,22 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor & _test_optional_floatlist_out(const at::Tensor & values, ::std::optional> addends, at::Tensor & out); +TORCH_API at::Tensor _test_optional_floatlist(const at::Tensor & values, ::std::optional> addends); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_optional_floatlist_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_optional_floatlist_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..ecb45824a31d479c87a4e5edf56a5e396672955f --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_optional_floatlist_ops.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _test_optional_floatlist { + using schema = at::Tensor (const at::Tensor &, ::std::optional>); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_test_optional_floatlist"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_test_optional_floatlist(Tensor values, float[]? addends) -> Tensor"; + static at::Tensor call(const at::Tensor & values, ::std::optional> addends); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & values, ::std::optional> addends); +}; + +struct TORCH_API _test_optional_floatlist_out { + using schema = at::Tensor & (const at::Tensor &, ::std::optional>, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_test_optional_floatlist"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_test_optional_floatlist.out(Tensor values, float[]? addends, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & values, ::std::optional> addends, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & values, ::std::optional> addends, at::Tensor & out); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_optional_intlist.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_optional_intlist.h new file mode 100644 index 0000000000000000000000000000000000000000..b0fef4ff910371a1db3739286fba32f7f554706c --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_optional_intlist.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_test_optional_intlist(Tensor values, int[]? addends) -> Tensor +inline at::Tensor _test_optional_intlist(const at::Tensor & values, at::OptionalIntArrayRef addends) { + return at::_ops::_test_optional_intlist::call(values, addends); +} + +// aten::_test_optional_intlist.out(Tensor values, int[]? addends, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _test_optional_intlist_out(at::Tensor & out, const at::Tensor & values, at::OptionalIntArrayRef addends) { + return at::_ops::_test_optional_intlist_out::call(values, addends, out); +} +// aten::_test_optional_intlist.out(Tensor values, int[]? addends, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _test_optional_intlist_outf(const at::Tensor & values, at::OptionalIntArrayRef addends, at::Tensor & out) { + return at::_ops::_test_optional_intlist_out::call(values, addends, out); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_optional_intlist_compositeexplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_optional_intlist_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..00d43e9495c08322900500d62c9bac6769e2579a --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_optional_intlist_compositeexplicitautograd_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor & _test_optional_intlist_out(at::Tensor & out, const at::Tensor & values, at::OptionalIntArrayRef addends); +TORCH_API at::Tensor & _test_optional_intlist_outf(const at::Tensor & values, at::OptionalIntArrayRef addends, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_optional_intlist_cpu_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_optional_intlist_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..687ede1d5ef1e24162ad663b8cd3e71a52394aff --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_optional_intlist_cpu_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor _test_optional_intlist(const at::Tensor & values, at::OptionalIntArrayRef addends); + +} // namespace cpu +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_optional_intlist_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_optional_intlist_native.h new file mode 100644 index 0000000000000000000000000000000000000000..61821a12c94a351517b3ce9c15042b2db47e180a --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_optional_intlist_native.h @@ -0,0 +1,22 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor & _test_optional_intlist_out(const at::Tensor & values, at::OptionalIntArrayRef addends, at::Tensor & out); +TORCH_API at::Tensor _test_optional_intlist(const at::Tensor & values, at::OptionalIntArrayRef addends); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_optional_intlist_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_optional_intlist_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..352f25fc9fd4250e0757965540689042002279d3 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_optional_intlist_ops.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _test_optional_intlist { + using schema = at::Tensor (const at::Tensor &, at::OptionalIntArrayRef); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_test_optional_intlist"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_test_optional_intlist(Tensor values, int[]? addends) -> Tensor"; + static at::Tensor call(const at::Tensor & values, at::OptionalIntArrayRef addends); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & values, at::OptionalIntArrayRef addends); +}; + +struct TORCH_API _test_optional_intlist_out { + using schema = at::Tensor & (const at::Tensor &, at::OptionalIntArrayRef, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_test_optional_intlist"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_test_optional_intlist.out(Tensor values, int[]? addends, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & values, at::OptionalIntArrayRef addends, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & values, at::OptionalIntArrayRef addends, at::Tensor & out); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_parallel_materialize.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_parallel_materialize.h new file mode 100644 index 0000000000000000000000000000000000000000..e99657a2a937b651ce97e3f5502a4e756d6509bd --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_parallel_materialize.h @@ -0,0 +1,31 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_test_parallel_materialize(Tensor self, int num_parallel, bool skip_first=False) -> Tensor +inline at::Tensor _test_parallel_materialize(const at::Tensor & self, int64_t num_parallel, bool skip_first=false) { + return at::_ops::_test_parallel_materialize::call(self, num_parallel, skip_first); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_parallel_materialize_compositeexplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_parallel_materialize_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..d8142e102b7d5b33d7bb64e353d6d5913dad8246 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_parallel_materialize_compositeexplicitautograd_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor _test_parallel_materialize(const at::Tensor & self, int64_t num_parallel, bool skip_first=false); + +} // namespace compositeexplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_parallel_materialize_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_parallel_materialize_native.h new file mode 100644 index 0000000000000000000000000000000000000000..e3a2dc80a983c3d067bce0d4d4a7d552ddaa8d87 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_parallel_materialize_native.h @@ -0,0 +1,21 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor _test_parallel_materialize(const at::Tensor & self, int64_t num_parallel, bool skip_first=false); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_parallel_materialize_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_parallel_materialize_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..29eae20b86463acd0a50d526d92b422d3bcd723c --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_parallel_materialize_ops.h @@ -0,0 +1,29 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _test_parallel_materialize { + using schema = at::Tensor (const at::Tensor &, int64_t, bool); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_test_parallel_materialize"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_test_parallel_materialize(Tensor self, int num_parallel, bool skip_first=False) -> Tensor"; + static at::Tensor call(const at::Tensor & self, int64_t num_parallel, bool skip_first); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, int64_t num_parallel, bool skip_first); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_serialization_subcmul.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_serialization_subcmul.h new file mode 100644 index 0000000000000000000000000000000000000000..4a35d200c203280e5a2ea8f33f642f00245e953f --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_serialization_subcmul.h @@ -0,0 +1,31 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_test_serialization_subcmul(Tensor self, Tensor other, Scalar alpha=1) -> Tensor +inline at::Tensor _test_serialization_subcmul(const at::Tensor & self, const at::Tensor & other, const at::Scalar & alpha=1) { + return at::_ops::_test_serialization_subcmul::call(self, other, alpha); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_serialization_subcmul_compositeimplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_serialization_subcmul_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..a0fe3f6d2d6deb326aa86fbc9e4a98878abd21da --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_serialization_subcmul_compositeimplicitautograd_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor _test_serialization_subcmul(const at::Tensor & self, const at::Tensor & other, const at::Scalar & alpha=1); + +} // namespace compositeimplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_serialization_subcmul_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_serialization_subcmul_native.h new file mode 100644 index 0000000000000000000000000000000000000000..098182cbb33c15620e3e1340f2d77d9879b5e5a4 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_serialization_subcmul_native.h @@ -0,0 +1,21 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor _test_serialization_subcmul(const at::Tensor & self, const at::Tensor & other, const at::Scalar & alpha=1); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_serialization_subcmul_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_serialization_subcmul_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..ee5405374c99868acb97cb935884cdf07677b59c --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_serialization_subcmul_ops.h @@ -0,0 +1,29 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _test_serialization_subcmul { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &, const at::Scalar &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_test_serialization_subcmul"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_test_serialization_subcmul(Tensor self, Tensor other, Scalar alpha=1) -> Tensor"; + static at::Tensor call(const at::Tensor & self, const at::Tensor & other, const at::Scalar & alpha); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & other, const at::Scalar & alpha); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_string_default.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_string_default.h new file mode 100644 index 0000000000000000000000000000000000000000..a73dbf5c87f743d8d4472c2b68c5c91ad4427943 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_string_default.h @@ -0,0 +1,31 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_test_string_default(Tensor dummy, str a="\"'\\", str b='"\'\\') -> Tensor +inline at::Tensor _test_string_default(const at::Tensor & dummy, c10::string_view a="\"'\\", c10::string_view b="\"'\\") { + return at::_ops::_test_string_default::call(dummy, a, b); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_string_default_compositeimplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_string_default_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..bf369fbc6fed44ef602f6a1c873c870727d78325 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_string_default_compositeimplicitautograd_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor _test_string_default(const at::Tensor & dummy, c10::string_view a="\"'\\", c10::string_view b="\"'\\"); + +} // namespace compositeimplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_string_default_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_string_default_native.h new file mode 100644 index 0000000000000000000000000000000000000000..23fed36e010bdee7e279a1ec0b3240949bda818d --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_string_default_native.h @@ -0,0 +1,21 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor _test_string_default(const at::Tensor & dummy, c10::string_view a="\"'\\", c10::string_view b="\"'\\"); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_string_default_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_string_default_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..5cdb492480185f50c47f6676afd6443c5b310e3b --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_string_default_ops.h @@ -0,0 +1,29 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _test_string_default { + using schema = at::Tensor (const at::Tensor &, c10::string_view, c10::string_view); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_test_string_default"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_test_string_default(Tensor dummy, str a=\"\\\"'\\\\\", str b='\"\\'\\\\') -> Tensor"; + static at::Tensor call(const at::Tensor & dummy, c10::string_view a, c10::string_view b); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & dummy, c10::string_view a, c10::string_view b); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_warn_in_autograd.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_warn_in_autograd.h new file mode 100644 index 0000000000000000000000000000000000000000..0272d24a309ef74dd3231ca4e1b655fef5c92375 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_warn_in_autograd.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_test_warn_in_autograd(Tensor self) -> Tensor +inline at::Tensor _test_warn_in_autograd(const at::Tensor & self) { + return at::_ops::_test_warn_in_autograd::call(self); +} + +// aten::_test_warn_in_autograd.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _test_warn_in_autograd_out(at::Tensor & out, const at::Tensor & self) { + return at::_ops::_test_warn_in_autograd_out::call(self, out); +} +// aten::_test_warn_in_autograd.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _test_warn_in_autograd_outf(const at::Tensor & self, at::Tensor & out) { + return at::_ops::_test_warn_in_autograd_out::call(self, out); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_warn_in_autograd_compositeexplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_warn_in_autograd_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..517d805e3fd3c13dc86761a2e58862020bb0efc9 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_warn_in_autograd_compositeexplicitautograd_dispatch.h @@ -0,0 +1,25 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor _test_warn_in_autograd(const at::Tensor & self); +TORCH_API at::Tensor & _test_warn_in_autograd_out(at::Tensor & out, const at::Tensor & self); +TORCH_API at::Tensor & _test_warn_in_autograd_outf(const at::Tensor & self, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_warn_in_autograd_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_warn_in_autograd_native.h new file mode 100644 index 0000000000000000000000000000000000000000..8dbe4805b9e39404b6267b0f6291778b24197207 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_warn_in_autograd_native.h @@ -0,0 +1,22 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor _test_warn_in_autograd(const at::Tensor & self); +TORCH_API at::Tensor & _test_warn_in_autograd_out(const at::Tensor & self, at::Tensor & out); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_warn_in_autograd_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_warn_in_autograd_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..1a990c1b627cd9054ae992a77f000737db93757a --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_test_warn_in_autograd_ops.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _test_warn_in_autograd { + using schema = at::Tensor (const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_test_warn_in_autograd"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_test_warn_in_autograd(Tensor self) -> Tensor"; + static at::Tensor call(const at::Tensor & self); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self); +}; + +struct TORCH_API _test_warn_in_autograd_out { + using schema = at::Tensor & (const at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_test_warn_in_autograd"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_test_warn_in_autograd.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::Tensor & out); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_differentiable_gru_cell_backward.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_differentiable_gru_cell_backward.h new file mode 100644 index 0000000000000000000000000000000000000000..c8da92f71f55d5fda803d4f65ace32071cf0605c --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_differentiable_gru_cell_backward.h @@ -0,0 +1,31 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_thnn_differentiable_gru_cell_backward(Tensor grad_hy, Tensor input_gates, Tensor hidden_gates, Tensor hx, Tensor? input_bias, Tensor? hidden_bias) -> (Tensor, Tensor, Tensor, Tensor, Tensor) +inline ::std::tuple _thnn_differentiable_gru_cell_backward(const at::Tensor & grad_hy, const at::Tensor & input_gates, const at::Tensor & hidden_gates, const at::Tensor & hx, const ::std::optional & input_bias, const ::std::optional & hidden_bias) { + return at::_ops::_thnn_differentiable_gru_cell_backward::call(grad_hy, input_gates, hidden_gates, hx, input_bias, hidden_bias); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_differentiable_gru_cell_backward_compositeimplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_differentiable_gru_cell_backward_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..af24893321e29ddec6901174c698cc673e1059d9 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_differentiable_gru_cell_backward_compositeimplicitautograd_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API ::std::tuple _thnn_differentiable_gru_cell_backward(const at::Tensor & grad_hy, const at::Tensor & input_gates, const at::Tensor & hidden_gates, const at::Tensor & hx, const ::std::optional & input_bias, const ::std::optional & hidden_bias); + +} // namespace compositeimplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_differentiable_gru_cell_backward_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_differentiable_gru_cell_backward_native.h new file mode 100644 index 0000000000000000000000000000000000000000..fe9de3a045f67aeefe7395939a76a16d5495a2fa --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_differentiable_gru_cell_backward_native.h @@ -0,0 +1,21 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API ::std::tuple _thnn_differentiable_gru_cell_backward(const at::Tensor & grad_hy, const at::Tensor & input_gates, const at::Tensor & hidden_gates, const at::Tensor & hx, const ::std::optional & input_bias, const ::std::optional & hidden_bias); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_differentiable_gru_cell_backward_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_differentiable_gru_cell_backward_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..9ca085704c8868dcee03d219f21073cc5bd36e77 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_differentiable_gru_cell_backward_ops.h @@ -0,0 +1,29 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _thnn_differentiable_gru_cell_backward { + using schema = ::std::tuple (const at::Tensor &, const at::Tensor &, const at::Tensor &, const at::Tensor &, const ::std::optional &, const ::std::optional &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_thnn_differentiable_gru_cell_backward"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_thnn_differentiable_gru_cell_backward(Tensor grad_hy, Tensor input_gates, Tensor hidden_gates, Tensor hx, Tensor? input_bias, Tensor? hidden_bias) -> (Tensor, Tensor, Tensor, Tensor, Tensor)"; + static ::std::tuple call(const at::Tensor & grad_hy, const at::Tensor & input_gates, const at::Tensor & hidden_gates, const at::Tensor & hx, const ::std::optional & input_bias, const ::std::optional & hidden_bias); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & grad_hy, const at::Tensor & input_gates, const at::Tensor & hidden_gates, const at::Tensor & hx, const ::std::optional & input_bias, const ::std::optional & hidden_bias); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_differentiable_lstm_cell_backward.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_differentiable_lstm_cell_backward.h new file mode 100644 index 0000000000000000000000000000000000000000..bf6261a1ba913ae5ef2a277f4bb143f8976a7886 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_differentiable_lstm_cell_backward.h @@ -0,0 +1,31 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_thnn_differentiable_lstm_cell_backward(Tensor? grad_hy, Tensor? grad_cy, Tensor input_gates, Tensor hidden_gates, Tensor? input_bias, Tensor? hidden_bias, Tensor cx, Tensor cy) -> (Tensor, Tensor, Tensor, Tensor, Tensor) +inline ::std::tuple _thnn_differentiable_lstm_cell_backward(const ::std::optional & grad_hy, const ::std::optional & grad_cy, const at::Tensor & input_gates, const at::Tensor & hidden_gates, const ::std::optional & input_bias, const ::std::optional & hidden_bias, const at::Tensor & cx, const at::Tensor & cy) { + return at::_ops::_thnn_differentiable_lstm_cell_backward::call(grad_hy, grad_cy, input_gates, hidden_gates, input_bias, hidden_bias, cx, cy); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_differentiable_lstm_cell_backward_compositeimplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_differentiable_lstm_cell_backward_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..8c0b3d1dd2b77e189b6176961cab112503585d83 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_differentiable_lstm_cell_backward_compositeimplicitautograd_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API ::std::tuple _thnn_differentiable_lstm_cell_backward(const ::std::optional & grad_hy, const ::std::optional & grad_cy, const at::Tensor & input_gates, const at::Tensor & hidden_gates, const ::std::optional & input_bias, const ::std::optional & hidden_bias, const at::Tensor & cx, const at::Tensor & cy); + +} // namespace compositeimplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_differentiable_lstm_cell_backward_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_differentiable_lstm_cell_backward_native.h new file mode 100644 index 0000000000000000000000000000000000000000..030b239aec669d39f22d4da18d71ab14007f06f3 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_differentiable_lstm_cell_backward_native.h @@ -0,0 +1,21 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API ::std::tuple _thnn_differentiable_lstm_cell_backward(const ::std::optional & grad_hy, const ::std::optional & grad_cy, const at::Tensor & input_gates, const at::Tensor & hidden_gates, const ::std::optional & input_bias, const ::std::optional & hidden_bias, const at::Tensor & cx, const at::Tensor & cy); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_differentiable_lstm_cell_backward_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_differentiable_lstm_cell_backward_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..adfe51139d02a46808a313c6e1f716a93382a3f1 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_differentiable_lstm_cell_backward_ops.h @@ -0,0 +1,29 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _thnn_differentiable_lstm_cell_backward { + using schema = ::std::tuple (const ::std::optional &, const ::std::optional &, const at::Tensor &, const at::Tensor &, const ::std::optional &, const ::std::optional &, const at::Tensor &, const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_thnn_differentiable_lstm_cell_backward"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_thnn_differentiable_lstm_cell_backward(Tensor? grad_hy, Tensor? grad_cy, Tensor input_gates, Tensor hidden_gates, Tensor? input_bias, Tensor? hidden_bias, Tensor cx, Tensor cy) -> (Tensor, Tensor, Tensor, Tensor, Tensor)"; + static ::std::tuple call(const ::std::optional & grad_hy, const ::std::optional & grad_cy, const at::Tensor & input_gates, const at::Tensor & hidden_gates, const ::std::optional & input_bias, const ::std::optional & hidden_bias, const at::Tensor & cx, const at::Tensor & cy); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const ::std::optional & grad_hy, const ::std::optional & grad_cy, const at::Tensor & input_gates, const at::Tensor & hidden_gates, const ::std::optional & input_bias, const ::std::optional & hidden_bias, const at::Tensor & cx, const at::Tensor & cy); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_fused_gru_cell.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_fused_gru_cell.h new file mode 100644 index 0000000000000000000000000000000000000000..edcc5af5fa857aaab75c1047e6b97b067a0ee944 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_fused_gru_cell.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_thnn_fused_gru_cell(Tensor input_gates, Tensor hidden_gates, Tensor hx, Tensor? input_bias=None, Tensor? hidden_bias=None) -> (Tensor, Tensor) +inline ::std::tuple _thnn_fused_gru_cell(const at::Tensor & input_gates, const at::Tensor & hidden_gates, const at::Tensor & hx, const ::std::optional & input_bias={}, const ::std::optional & hidden_bias={}) { + return at::_ops::_thnn_fused_gru_cell::call(input_gates, hidden_gates, hx, input_bias, hidden_bias); +} + +// aten::_thnn_fused_gru_cell.out(Tensor input_gates, Tensor hidden_gates, Tensor hx, Tensor? input_bias=None, Tensor? hidden_bias=None, *, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!)) +inline ::std::tuple _thnn_fused_gru_cell_out(at::Tensor & out0, at::Tensor & out1, const at::Tensor & input_gates, const at::Tensor & hidden_gates, const at::Tensor & hx, const ::std::optional & input_bias={}, const ::std::optional & hidden_bias={}) { + return at::_ops::_thnn_fused_gru_cell_out::call(input_gates, hidden_gates, hx, input_bias, hidden_bias, out0, out1); +} +// aten::_thnn_fused_gru_cell.out(Tensor input_gates, Tensor hidden_gates, Tensor hx, Tensor? input_bias=None, Tensor? hidden_bias=None, *, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!)) +inline ::std::tuple _thnn_fused_gru_cell_outf(const at::Tensor & input_gates, const at::Tensor & hidden_gates, const at::Tensor & hx, const ::std::optional & input_bias, const ::std::optional & hidden_bias, at::Tensor & out0, at::Tensor & out1) { + return at::_ops::_thnn_fused_gru_cell_out::call(input_gates, hidden_gates, hx, input_bias, hidden_bias, out0, out1); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_fused_gru_cell_backward.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_fused_gru_cell_backward.h new file mode 100644 index 0000000000000000000000000000000000000000..8b631aac7444834925c74e27681a1e99412211c7 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_fused_gru_cell_backward.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_thnn_fused_gru_cell_backward(Tensor grad_hy, Tensor workspace, bool has_bias) -> (Tensor, Tensor, Tensor, Tensor, Tensor) +inline ::std::tuple _thnn_fused_gru_cell_backward(const at::Tensor & grad_hy, const at::Tensor & workspace, bool has_bias) { + return at::_ops::_thnn_fused_gru_cell_backward::call(grad_hy, workspace, has_bias); +} + +// aten::_thnn_fused_gru_cell_backward.out(Tensor grad_hy, Tensor workspace, bool has_bias, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2, Tensor(d!) out3, Tensor(e!) out4) -> (Tensor(a!), Tensor(b!), Tensor(c!), Tensor(d!), Tensor(e!)) +inline ::std::tuple _thnn_fused_gru_cell_backward_out(at::Tensor & out0, at::Tensor & out1, at::Tensor & out2, at::Tensor & out3, at::Tensor & out4, const at::Tensor & grad_hy, const at::Tensor & workspace, bool has_bias) { + return at::_ops::_thnn_fused_gru_cell_backward_out::call(grad_hy, workspace, has_bias, out0, out1, out2, out3, out4); +} +// aten::_thnn_fused_gru_cell_backward.out(Tensor grad_hy, Tensor workspace, bool has_bias, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2, Tensor(d!) out3, Tensor(e!) out4) -> (Tensor(a!), Tensor(b!), Tensor(c!), Tensor(d!), Tensor(e!)) +inline ::std::tuple _thnn_fused_gru_cell_backward_outf(const at::Tensor & grad_hy, const at::Tensor & workspace, bool has_bias, at::Tensor & out0, at::Tensor & out1, at::Tensor & out2, at::Tensor & out3, at::Tensor & out4) { + return at::_ops::_thnn_fused_gru_cell_backward_out::call(grad_hy, workspace, has_bias, out0, out1, out2, out3, out4); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_fused_gru_cell_backward_compositeexplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_fused_gru_cell_backward_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..23ab0aebfe1fd2e8dcbb2cf85acb2127112818dd --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_fused_gru_cell_backward_compositeexplicitautograd_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API ::std::tuple _thnn_fused_gru_cell_backward_out(at::Tensor & out0, at::Tensor & out1, at::Tensor & out2, at::Tensor & out3, at::Tensor & out4, const at::Tensor & grad_hy, const at::Tensor & workspace, bool has_bias); +TORCH_API ::std::tuple _thnn_fused_gru_cell_backward_outf(const at::Tensor & grad_hy, const at::Tensor & workspace, bool has_bias, at::Tensor & out0, at::Tensor & out1, at::Tensor & out2, at::Tensor & out3, at::Tensor & out4); + +} // namespace compositeexplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_fused_gru_cell_backward_cuda_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_fused_gru_cell_backward_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..3aa6cbed3d28cd50bc7ec2f378d88dcaec666926 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_fused_gru_cell_backward_cuda_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API ::std::tuple _thnn_fused_gru_cell_backward(const at::Tensor & grad_hy, const at::Tensor & workspace, bool has_bias); + +} // namespace cuda +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_fused_gru_cell_backward_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_fused_gru_cell_backward_native.h new file mode 100644 index 0000000000000000000000000000000000000000..f63ce151b0914f000238c488fb11ba6b8915412a --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_fused_gru_cell_backward_native.h @@ -0,0 +1,22 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API ::std::tuple _thnn_fused_gru_cell_backward_out(const at::Tensor & grad_hy, const at::Tensor & workspace, bool has_bias, at::Tensor & out0, at::Tensor & out1, at::Tensor & out2, at::Tensor & out3, at::Tensor & out4); +TORCH_API ::std::tuple _thnn_fused_gru_cell_backward_cuda(const at::Tensor & grad_hy, const at::Tensor & workspace, bool has_bias); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_fused_gru_cell_backward_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_fused_gru_cell_backward_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..8f5d659e4f1f5f6779db817b75e033b3bcfe3408 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_fused_gru_cell_backward_ops.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _thnn_fused_gru_cell_backward { + using schema = ::std::tuple (const at::Tensor &, const at::Tensor &, bool); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_thnn_fused_gru_cell_backward"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_thnn_fused_gru_cell_backward(Tensor grad_hy, Tensor workspace, bool has_bias) -> (Tensor, Tensor, Tensor, Tensor, Tensor)"; + static ::std::tuple call(const at::Tensor & grad_hy, const at::Tensor & workspace, bool has_bias); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & grad_hy, const at::Tensor & workspace, bool has_bias); +}; + +struct TORCH_API _thnn_fused_gru_cell_backward_out { + using schema = ::std::tuple (const at::Tensor &, const at::Tensor &, bool, at::Tensor &, at::Tensor &, at::Tensor &, at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_thnn_fused_gru_cell_backward"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_thnn_fused_gru_cell_backward.out(Tensor grad_hy, Tensor workspace, bool has_bias, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2, Tensor(d!) out3, Tensor(e!) out4) -> (Tensor(a!), Tensor(b!), Tensor(c!), Tensor(d!), Tensor(e!))"; + static ::std::tuple call(const at::Tensor & grad_hy, const at::Tensor & workspace, bool has_bias, at::Tensor & out0, at::Tensor & out1, at::Tensor & out2, at::Tensor & out3, at::Tensor & out4); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & grad_hy, const at::Tensor & workspace, bool has_bias, at::Tensor & out0, at::Tensor & out1, at::Tensor & out2, at::Tensor & out3, at::Tensor & out4); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_fused_gru_cell_compositeexplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_fused_gru_cell_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..bf76eb1a4939e1cf01d0f2efdbd042e39cc0e25b --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_fused_gru_cell_compositeexplicitautograd_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API ::std::tuple _thnn_fused_gru_cell_out(at::Tensor & out0, at::Tensor & out1, const at::Tensor & input_gates, const at::Tensor & hidden_gates, const at::Tensor & hx, const ::std::optional & input_bias={}, const ::std::optional & hidden_bias={}); +TORCH_API ::std::tuple _thnn_fused_gru_cell_outf(const at::Tensor & input_gates, const at::Tensor & hidden_gates, const at::Tensor & hx, const ::std::optional & input_bias, const ::std::optional & hidden_bias, at::Tensor & out0, at::Tensor & out1); + +} // namespace compositeexplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_fused_gru_cell_cuda_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_fused_gru_cell_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..ffeeaf5c3cd8aea2dfe667c000c572376d174799 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_fused_gru_cell_cuda_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API ::std::tuple _thnn_fused_gru_cell(const at::Tensor & input_gates, const at::Tensor & hidden_gates, const at::Tensor & hx, const ::std::optional & input_bias={}, const ::std::optional & hidden_bias={}); + +} // namespace cuda +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_fused_gru_cell_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_fused_gru_cell_native.h new file mode 100644 index 0000000000000000000000000000000000000000..fa55b88d1de4034682381a769db798673d36bad3 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_fused_gru_cell_native.h @@ -0,0 +1,22 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API ::std::tuple _thnn_fused_gru_cell_out(const at::Tensor & input_gates, const at::Tensor & hidden_gates, const at::Tensor & hx, const ::std::optional & input_bias, const ::std::optional & hidden_bias, at::Tensor & out0, at::Tensor & out1); +TORCH_API ::std::tuple _thnn_fused_gru_cell_cuda(const at::Tensor & input_gates, const at::Tensor & hidden_gates, const at::Tensor & hx, const ::std::optional & input_bias={}, const ::std::optional & hidden_bias={}); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_fused_gru_cell_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_fused_gru_cell_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..ddddb1e663172bbb8ec8abb24d2788f7acfd9c58 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_fused_gru_cell_ops.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _thnn_fused_gru_cell { + using schema = ::std::tuple (const at::Tensor &, const at::Tensor &, const at::Tensor &, const ::std::optional &, const ::std::optional &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_thnn_fused_gru_cell"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_thnn_fused_gru_cell(Tensor input_gates, Tensor hidden_gates, Tensor hx, Tensor? input_bias=None, Tensor? hidden_bias=None) -> (Tensor, Tensor)"; + static ::std::tuple call(const at::Tensor & input_gates, const at::Tensor & hidden_gates, const at::Tensor & hx, const ::std::optional & input_bias, const ::std::optional & hidden_bias); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & input_gates, const at::Tensor & hidden_gates, const at::Tensor & hx, const ::std::optional & input_bias, const ::std::optional & hidden_bias); +}; + +struct TORCH_API _thnn_fused_gru_cell_out { + using schema = ::std::tuple (const at::Tensor &, const at::Tensor &, const at::Tensor &, const ::std::optional &, const ::std::optional &, at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_thnn_fused_gru_cell"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_thnn_fused_gru_cell.out(Tensor input_gates, Tensor hidden_gates, Tensor hx, Tensor? input_bias=None, Tensor? hidden_bias=None, *, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!))"; + static ::std::tuple call(const at::Tensor & input_gates, const at::Tensor & hidden_gates, const at::Tensor & hx, const ::std::optional & input_bias, const ::std::optional & hidden_bias, at::Tensor & out0, at::Tensor & out1); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & input_gates, const at::Tensor & hidden_gates, const at::Tensor & hx, const ::std::optional & input_bias, const ::std::optional & hidden_bias, at::Tensor & out0, at::Tensor & out1); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_fused_lstm_cell.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_fused_lstm_cell.h new file mode 100644 index 0000000000000000000000000000000000000000..8e72b712e616c9d97af60e8aef7675a920153151 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_fused_lstm_cell.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_thnn_fused_lstm_cell(Tensor input_gates, Tensor hidden_gates, Tensor cx, Tensor? input_bias=None, Tensor? hidden_bias=None) -> (Tensor, Tensor, Tensor) +inline ::std::tuple _thnn_fused_lstm_cell(const at::Tensor & input_gates, const at::Tensor & hidden_gates, const at::Tensor & cx, const ::std::optional & input_bias={}, const ::std::optional & hidden_bias={}) { + return at::_ops::_thnn_fused_lstm_cell::call(input_gates, hidden_gates, cx, input_bias, hidden_bias); +} + +// aten::_thnn_fused_lstm_cell.out(Tensor input_gates, Tensor hidden_gates, Tensor cx, Tensor? input_bias=None, Tensor? hidden_bias=None, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2) -> (Tensor(a!), Tensor(b!), Tensor(c!)) +inline ::std::tuple _thnn_fused_lstm_cell_out(at::Tensor & out0, at::Tensor & out1, at::Tensor & out2, const at::Tensor & input_gates, const at::Tensor & hidden_gates, const at::Tensor & cx, const ::std::optional & input_bias={}, const ::std::optional & hidden_bias={}) { + return at::_ops::_thnn_fused_lstm_cell_out::call(input_gates, hidden_gates, cx, input_bias, hidden_bias, out0, out1, out2); +} +// aten::_thnn_fused_lstm_cell.out(Tensor input_gates, Tensor hidden_gates, Tensor cx, Tensor? input_bias=None, Tensor? hidden_bias=None, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2) -> (Tensor(a!), Tensor(b!), Tensor(c!)) +inline ::std::tuple _thnn_fused_lstm_cell_outf(const at::Tensor & input_gates, const at::Tensor & hidden_gates, const at::Tensor & cx, const ::std::optional & input_bias, const ::std::optional & hidden_bias, at::Tensor & out0, at::Tensor & out1, at::Tensor & out2) { + return at::_ops::_thnn_fused_lstm_cell_out::call(input_gates, hidden_gates, cx, input_bias, hidden_bias, out0, out1, out2); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_fused_lstm_cell_backward.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_fused_lstm_cell_backward.h new file mode 100644 index 0000000000000000000000000000000000000000..e795cffe70823a37595bff52f844668c55b06e09 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_fused_lstm_cell_backward.h @@ -0,0 +1,31 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_thnn_fused_lstm_cell_backward(Tensor? grad_hy, Tensor? grad_cy, Tensor cx, Tensor cy, Tensor workspace, bool has_bias) -> (Tensor, Tensor, Tensor, Tensor, Tensor) +inline ::std::tuple _thnn_fused_lstm_cell_backward(const ::std::optional & grad_hy, const ::std::optional & grad_cy, const at::Tensor & cx, const at::Tensor & cy, const at::Tensor & workspace, bool has_bias) { + return at::_ops::_thnn_fused_lstm_cell_backward::call(grad_hy, grad_cy, cx, cy, workspace, has_bias); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_fused_lstm_cell_backward_compositeimplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_fused_lstm_cell_backward_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..f533934115bb127cfad1bfac2578c6ba40f253d8 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_fused_lstm_cell_backward_compositeimplicitautograd_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API ::std::tuple _thnn_fused_lstm_cell_backward(const ::std::optional & grad_hy, const ::std::optional & grad_cy, const at::Tensor & cx, const at::Tensor & cy, const at::Tensor & workspace, bool has_bias); + +} // namespace compositeimplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_fused_lstm_cell_backward_impl.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_fused_lstm_cell_backward_impl.h new file mode 100644 index 0000000000000000000000000000000000000000..ca34d455d0edb2a198d0d786c18964217d19cac2 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_fused_lstm_cell_backward_impl.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_thnn_fused_lstm_cell_backward_impl(Tensor? grad_hy, Tensor? grad_cy, Tensor cx, Tensor cy, Tensor workspace, bool has_bias) -> (Tensor, Tensor, Tensor) +inline ::std::tuple _thnn_fused_lstm_cell_backward_impl(const ::std::optional & grad_hy, const ::std::optional & grad_cy, const at::Tensor & cx, const at::Tensor & cy, const at::Tensor & workspace, bool has_bias) { + return at::_ops::_thnn_fused_lstm_cell_backward_impl::call(grad_hy, grad_cy, cx, cy, workspace, has_bias); +} + +// aten::_thnn_fused_lstm_cell_backward_impl.out(Tensor? grad_hy, Tensor? grad_cy, Tensor cx, Tensor cy, Tensor workspace, bool has_bias, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2) -> (Tensor(a!), Tensor(b!), Tensor(c!)) +inline ::std::tuple _thnn_fused_lstm_cell_backward_impl_out(at::Tensor & out0, at::Tensor & out1, at::Tensor & out2, const ::std::optional & grad_hy, const ::std::optional & grad_cy, const at::Tensor & cx, const at::Tensor & cy, const at::Tensor & workspace, bool has_bias) { + return at::_ops::_thnn_fused_lstm_cell_backward_impl_out::call(grad_hy, grad_cy, cx, cy, workspace, has_bias, out0, out1, out2); +} +// aten::_thnn_fused_lstm_cell_backward_impl.out(Tensor? grad_hy, Tensor? grad_cy, Tensor cx, Tensor cy, Tensor workspace, bool has_bias, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2) -> (Tensor(a!), Tensor(b!), Tensor(c!)) +inline ::std::tuple _thnn_fused_lstm_cell_backward_impl_outf(const ::std::optional & grad_hy, const ::std::optional & grad_cy, const at::Tensor & cx, const at::Tensor & cy, const at::Tensor & workspace, bool has_bias, at::Tensor & out0, at::Tensor & out1, at::Tensor & out2) { + return at::_ops::_thnn_fused_lstm_cell_backward_impl_out::call(grad_hy, grad_cy, cx, cy, workspace, has_bias, out0, out1, out2); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_fused_lstm_cell_backward_impl_compositeexplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_fused_lstm_cell_backward_impl_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..e4934c90e5aac1ef8ebcbe20e40e638c56cefcc1 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_fused_lstm_cell_backward_impl_compositeexplicitautograd_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API ::std::tuple _thnn_fused_lstm_cell_backward_impl_out(at::Tensor & out0, at::Tensor & out1, at::Tensor & out2, const ::std::optional & grad_hy, const ::std::optional & grad_cy, const at::Tensor & cx, const at::Tensor & cy, const at::Tensor & workspace, bool has_bias); +TORCH_API ::std::tuple _thnn_fused_lstm_cell_backward_impl_outf(const ::std::optional & grad_hy, const ::std::optional & grad_cy, const at::Tensor & cx, const at::Tensor & cy, const at::Tensor & workspace, bool has_bias, at::Tensor & out0, at::Tensor & out1, at::Tensor & out2); + +} // namespace compositeexplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_fused_lstm_cell_backward_impl_cuda_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_fused_lstm_cell_backward_impl_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..ef7efba87d34da1449c3f18db06ecf8c1cf645ba --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_fused_lstm_cell_backward_impl_cuda_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API ::std::tuple _thnn_fused_lstm_cell_backward_impl(const ::std::optional & grad_hy, const ::std::optional & grad_cy, const at::Tensor & cx, const at::Tensor & cy, const at::Tensor & workspace, bool has_bias); + +} // namespace cuda +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_fused_lstm_cell_backward_impl_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_fused_lstm_cell_backward_impl_native.h new file mode 100644 index 0000000000000000000000000000000000000000..8c1a81745e68d36ac41a0485e799952465ae48e2 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_fused_lstm_cell_backward_impl_native.h @@ -0,0 +1,22 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API ::std::tuple _thnn_fused_lstm_cell_backward_impl_out(const ::std::optional & grad_hy, const ::std::optional & grad_cy, const at::Tensor & cx, const at::Tensor & cy, const at::Tensor & workspace, bool has_bias, at::Tensor & out0, at::Tensor & out1, at::Tensor & out2); +TORCH_API ::std::tuple _thnn_fused_lstm_cell_backward_impl_cuda(const ::std::optional & grad_hy, const ::std::optional & grad_cy, const at::Tensor & cx, const at::Tensor & cy, const at::Tensor & workspace, bool has_bias); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_fused_lstm_cell_backward_impl_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_fused_lstm_cell_backward_impl_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..3bd91c5021c99badfbaad671772e360784540e93 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_fused_lstm_cell_backward_impl_ops.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _thnn_fused_lstm_cell_backward_impl { + using schema = ::std::tuple (const ::std::optional &, const ::std::optional &, const at::Tensor &, const at::Tensor &, const at::Tensor &, bool); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_thnn_fused_lstm_cell_backward_impl"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_thnn_fused_lstm_cell_backward_impl(Tensor? grad_hy, Tensor? grad_cy, Tensor cx, Tensor cy, Tensor workspace, bool has_bias) -> (Tensor, Tensor, Tensor)"; + static ::std::tuple call(const ::std::optional & grad_hy, const ::std::optional & grad_cy, const at::Tensor & cx, const at::Tensor & cy, const at::Tensor & workspace, bool has_bias); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const ::std::optional & grad_hy, const ::std::optional & grad_cy, const at::Tensor & cx, const at::Tensor & cy, const at::Tensor & workspace, bool has_bias); +}; + +struct TORCH_API _thnn_fused_lstm_cell_backward_impl_out { + using schema = ::std::tuple (const ::std::optional &, const ::std::optional &, const at::Tensor &, const at::Tensor &, const at::Tensor &, bool, at::Tensor &, at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_thnn_fused_lstm_cell_backward_impl"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_thnn_fused_lstm_cell_backward_impl.out(Tensor? grad_hy, Tensor? grad_cy, Tensor cx, Tensor cy, Tensor workspace, bool has_bias, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2) -> (Tensor(a!), Tensor(b!), Tensor(c!))"; + static ::std::tuple call(const ::std::optional & grad_hy, const ::std::optional & grad_cy, const at::Tensor & cx, const at::Tensor & cy, const at::Tensor & workspace, bool has_bias, at::Tensor & out0, at::Tensor & out1, at::Tensor & out2); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const ::std::optional & grad_hy, const ::std::optional & grad_cy, const at::Tensor & cx, const at::Tensor & cy, const at::Tensor & workspace, bool has_bias, at::Tensor & out0, at::Tensor & out1, at::Tensor & out2); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_fused_lstm_cell_backward_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_fused_lstm_cell_backward_native.h new file mode 100644 index 0000000000000000000000000000000000000000..aa9d47ac6170cc8d34adf8126ad410fd3902c804 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_fused_lstm_cell_backward_native.h @@ -0,0 +1,21 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API ::std::tuple _thnn_fused_lstm_cell_backward(const ::std::optional & grad_hy, const ::std::optional & grad_cy, const at::Tensor & cx, const at::Tensor & cy, const at::Tensor & workspace, bool has_bias); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_fused_lstm_cell_backward_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_fused_lstm_cell_backward_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..5de573fc931ed13a21fcbd08d6978d00b0631fb5 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_fused_lstm_cell_backward_ops.h @@ -0,0 +1,29 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _thnn_fused_lstm_cell_backward { + using schema = ::std::tuple (const ::std::optional &, const ::std::optional &, const at::Tensor &, const at::Tensor &, const at::Tensor &, bool); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_thnn_fused_lstm_cell_backward"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_thnn_fused_lstm_cell_backward(Tensor? grad_hy, Tensor? grad_cy, Tensor cx, Tensor cy, Tensor workspace, bool has_bias) -> (Tensor, Tensor, Tensor, Tensor, Tensor)"; + static ::std::tuple call(const ::std::optional & grad_hy, const ::std::optional & grad_cy, const at::Tensor & cx, const at::Tensor & cy, const at::Tensor & workspace, bool has_bias); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const ::std::optional & grad_hy, const ::std::optional & grad_cy, const at::Tensor & cx, const at::Tensor & cy, const at::Tensor & workspace, bool has_bias); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_fused_lstm_cell_compositeexplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_fused_lstm_cell_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..7a54f856f309710ab98c1a79cee6b17a0f6376b7 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_fused_lstm_cell_compositeexplicitautograd_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API ::std::tuple _thnn_fused_lstm_cell_out(at::Tensor & out0, at::Tensor & out1, at::Tensor & out2, const at::Tensor & input_gates, const at::Tensor & hidden_gates, const at::Tensor & cx, const ::std::optional & input_bias={}, const ::std::optional & hidden_bias={}); +TORCH_API ::std::tuple _thnn_fused_lstm_cell_outf(const at::Tensor & input_gates, const at::Tensor & hidden_gates, const at::Tensor & cx, const ::std::optional & input_bias, const ::std::optional & hidden_bias, at::Tensor & out0, at::Tensor & out1, at::Tensor & out2); + +} // namespace compositeexplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_fused_lstm_cell_cuda_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_fused_lstm_cell_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..eb6770ab96e5ec11c79aeaa953429763f31032a4 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_fused_lstm_cell_cuda_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API ::std::tuple _thnn_fused_lstm_cell(const at::Tensor & input_gates, const at::Tensor & hidden_gates, const at::Tensor & cx, const ::std::optional & input_bias={}, const ::std::optional & hidden_bias={}); + +} // namespace cuda +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_fused_lstm_cell_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_fused_lstm_cell_native.h new file mode 100644 index 0000000000000000000000000000000000000000..614b44a1afe5b1b400a196233f26a1d55e02c9a3 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_fused_lstm_cell_native.h @@ -0,0 +1,22 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API ::std::tuple _thnn_fused_lstm_cell_out(const at::Tensor & input_gates, const at::Tensor & hidden_gates, const at::Tensor & cx, const ::std::optional & input_bias, const ::std::optional & hidden_bias, at::Tensor & out0, at::Tensor & out1, at::Tensor & out2); +TORCH_API ::std::tuple _thnn_fused_lstm_cell_cuda(const at::Tensor & input_gates, const at::Tensor & hidden_gates, const at::Tensor & cx, const ::std::optional & input_bias={}, const ::std::optional & hidden_bias={}); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_fused_lstm_cell_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_fused_lstm_cell_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..b40581f1c6632b2846727707267bda81c81cfe09 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_thnn_fused_lstm_cell_ops.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _thnn_fused_lstm_cell { + using schema = ::std::tuple (const at::Tensor &, const at::Tensor &, const at::Tensor &, const ::std::optional &, const ::std::optional &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_thnn_fused_lstm_cell"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_thnn_fused_lstm_cell(Tensor input_gates, Tensor hidden_gates, Tensor cx, Tensor? input_bias=None, Tensor? hidden_bias=None) -> (Tensor, Tensor, Tensor)"; + static ::std::tuple call(const at::Tensor & input_gates, const at::Tensor & hidden_gates, const at::Tensor & cx, const ::std::optional & input_bias, const ::std::optional & hidden_bias); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & input_gates, const at::Tensor & hidden_gates, const at::Tensor & cx, const ::std::optional & input_bias, const ::std::optional & hidden_bias); +}; + +struct TORCH_API _thnn_fused_lstm_cell_out { + using schema = ::std::tuple (const at::Tensor &, const at::Tensor &, const at::Tensor &, const ::std::optional &, const ::std::optional &, at::Tensor &, at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_thnn_fused_lstm_cell"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_thnn_fused_lstm_cell.out(Tensor input_gates, Tensor hidden_gates, Tensor cx, Tensor? input_bias=None, Tensor? hidden_bias=None, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2) -> (Tensor(a!), Tensor(b!), Tensor(c!))"; + static ::std::tuple call(const at::Tensor & input_gates, const at::Tensor & hidden_gates, const at::Tensor & cx, const ::std::optional & input_bias, const ::std::optional & hidden_bias, at::Tensor & out0, at::Tensor & out1, at::Tensor & out2); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & input_gates, const at::Tensor & hidden_gates, const at::Tensor & cx, const ::std::optional & input_bias, const ::std::optional & hidden_bias, at::Tensor & out0, at::Tensor & out1, at::Tensor & out2); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_copy.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_copy.h new file mode 100644 index 0000000000000000000000000000000000000000..2831a42ba4d1b15e267e46e1124541bd5dcef141 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_copy.h @@ -0,0 +1,44 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_to_copy(Tensor self, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, bool non_blocking=False, MemoryFormat? memory_format=None) -> Tensor +inline at::Tensor _to_copy(const at::Tensor & self, at::TensorOptions options={}, bool non_blocking=false, ::std::optional memory_format=::std::nullopt) { + return at::_ops::_to_copy::call(self, c10::optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt(), non_blocking, c10::impl::check_tensor_options_and_extract_memory_format(options, memory_format)); +} +// aten::_to_copy(Tensor self, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, bool non_blocking=False, MemoryFormat? memory_format=None) -> Tensor +inline at::Tensor _to_copy(const at::Tensor & self, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory, bool non_blocking, ::std::optional memory_format) { + return at::_ops::_to_copy::call(self, dtype, layout, device, pin_memory, non_blocking, memory_format); +} + +// aten::_to_copy.out(Tensor self, *, bool non_blocking=False, MemoryFormat? memory_format=None, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _to_copy_out(at::Tensor & out, const at::Tensor & self, bool non_blocking=false, ::std::optional memory_format=::std::nullopt) { + return at::_ops::_to_copy_out::call(self, non_blocking, memory_format, out); +} +// aten::_to_copy.out(Tensor self, *, bool non_blocking=False, MemoryFormat? memory_format=None, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _to_copy_outf(const at::Tensor & self, bool non_blocking, ::std::optional memory_format, at::Tensor & out) { + return at::_ops::_to_copy_out::call(self, non_blocking, memory_format, out); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_copy_compositeexplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_copy_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..be8f79fbb10683f9b29f8f02fe30112266180453 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_copy_compositeexplicitautograd_dispatch.h @@ -0,0 +1,26 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor _to_copy(const at::Tensor & self, at::TensorOptions options={}, bool non_blocking=false, ::std::optional memory_format=::std::nullopt); +TORCH_API at::Tensor _to_copy(const at::Tensor & self, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory, bool non_blocking, ::std::optional memory_format); +TORCH_API at::Tensor & _to_copy_out(at::Tensor & out, const at::Tensor & self, bool non_blocking=false, ::std::optional memory_format=::std::nullopt); +TORCH_API at::Tensor & _to_copy_outf(const at::Tensor & self, bool non_blocking, ::std::optional memory_format, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_copy_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_copy_native.h new file mode 100644 index 0000000000000000000000000000000000000000..ed5b0880ad79a6f87d5845ccb5ba1718ade4e305 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_copy_native.h @@ -0,0 +1,23 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor _to_copy(const at::Tensor & self, ::std::optional dtype={}, ::std::optional layout={}, ::std::optional device={}, ::std::optional pin_memory={}, bool non_blocking=false, ::std::optional memory_format=::std::nullopt); +TORCH_API at::Tensor & _to_copy_out(const at::Tensor & self, bool non_blocking, ::std::optional memory_format, at::Tensor & out); +TORCH_API at::Tensor _to_copy_nested(const at::Tensor & self, ::std::optional dtype={}, ::std::optional layout={}, ::std::optional device={}, ::std::optional pin_memory={}, bool non_blocking=false, ::std::optional memory_format=::std::nullopt); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_copy_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_copy_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..38369920dc02d744cf8c1c284993dbf929f5e175 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_copy_ops.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _to_copy { + using schema = at::Tensor (const at::Tensor &, ::std::optional, ::std::optional, ::std::optional, ::std::optional, bool, ::std::optional); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_to_copy"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_to_copy(Tensor self, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, bool non_blocking=False, MemoryFormat? memory_format=None) -> Tensor"; + static at::Tensor call(const at::Tensor & self, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory, bool non_blocking, ::std::optional memory_format); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory, bool non_blocking, ::std::optional memory_format); +}; + +struct TORCH_API _to_copy_out { + using schema = at::Tensor & (const at::Tensor &, bool, ::std::optional, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_to_copy"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_to_copy.out(Tensor self, *, bool non_blocking=False, MemoryFormat? memory_format=None, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, bool non_blocking, ::std::optional memory_format, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, bool non_blocking, ::std::optional memory_format, at::Tensor & out); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_cpu.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_cpu.h new file mode 100644 index 0000000000000000000000000000000000000000..e548bf4c843ab3d0a2ec8a114141ce7a156adee6 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_cpu.h @@ -0,0 +1,31 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_to_cpu(Tensor[] tensors) -> Tensor[] +inline ::std::vector _to_cpu(at::TensorList tensors) { + return at::_ops::_to_cpu::call(tensors); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_cpu_compositeimplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_cpu_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..56fc450ccb50b1f87c090b56838d3ed304414182 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_cpu_compositeimplicitautograd_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API ::std::vector _to_cpu(at::TensorList tensors); + +} // namespace compositeimplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_cpu_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_cpu_native.h new file mode 100644 index 0000000000000000000000000000000000000000..08bcf057617b66b5cc8f6981d4ac44fc7cc1c75f --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_cpu_native.h @@ -0,0 +1,21 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API ::std::vector _to_cpu(at::TensorList tensors); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_cpu_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_cpu_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..93af2542c39a94590511f2ba825ff74c3ed39835 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_cpu_ops.h @@ -0,0 +1,29 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _to_cpu { + using schema = ::std::vector (at::TensorList); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_to_cpu"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_to_cpu(Tensor[] tensors) -> Tensor[]"; + static ::std::vector call(at::TensorList tensors); + static ::std::vector redispatch(c10::DispatchKeySet dispatchKeySet, at::TensorList tensors); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_dense.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_dense.h new file mode 100644 index 0000000000000000000000000000000000000000..73e3f52cb164ede82673d5fc95a560d7d7851eb4 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_dense.h @@ -0,0 +1,35 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_to_dense.out(Tensor self, ScalarType? dtype=None, bool? masked_grad=None, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _to_dense_out(at::Tensor & out, const at::Tensor & self, ::std::optional dtype=::std::nullopt, ::std::optional masked_grad=::std::nullopt) { + return at::_ops::_to_dense_out::call(self, dtype, masked_grad, out); +} +// aten::_to_dense.out(Tensor self, ScalarType? dtype=None, bool? masked_grad=None, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _to_dense_outf(const at::Tensor & self, ::std::optional dtype, ::std::optional masked_grad, at::Tensor & out) { + return at::_ops::_to_dense_out::call(self, dtype, masked_grad, out); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_dense_compositeexplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_dense_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..41e91dcdb482444515f066fe820aa58030b6c3ce --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_dense_compositeexplicitautograd_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor & _to_dense_out(at::Tensor & out, const at::Tensor & self, ::std::optional dtype=::std::nullopt, ::std::optional masked_grad=::std::nullopt); +TORCH_API at::Tensor & _to_dense_outf(const at::Tensor & self, ::std::optional dtype, ::std::optional masked_grad, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_dense_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_dense_native.h new file mode 100644 index 0000000000000000000000000000000000000000..1c258d7d2e54ff404c7f5552a3eda5aac971aaf0 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_dense_native.h @@ -0,0 +1,24 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor & _to_dense_out(const at::Tensor & self, ::std::optional dtype, ::std::optional masked_grad, at::Tensor & out); +TORCH_API at::Tensor sparse_to_dense(const at::Tensor & self, ::std::optional dtype=::std::nullopt, ::std::optional masked_grad=::std::nullopt); +TORCH_API at::Tensor sparse_compressed_to_dense(const at::Tensor & self, ::std::optional dtype=::std::nullopt, ::std::optional masked_grad=::std::nullopt); +TORCH_API at::Tensor mkldnn_to_dense(const at::Tensor & self, ::std::optional dtype=::std::nullopt, ::std::optional masked_grad=::std::nullopt); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_dense_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_dense_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..87e43e20752f1d587727b1c689c3dcf593326832 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_dense_ops.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _to_dense { + using schema = at::Tensor (const at::Tensor &, ::std::optional, ::std::optional); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_to_dense"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_to_dense(Tensor self, ScalarType? dtype=None, bool? masked_grad=None) -> Tensor"; + static at::Tensor call(const at::Tensor & self, ::std::optional dtype, ::std::optional masked_grad); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, ::std::optional dtype, ::std::optional masked_grad); +}; + +struct TORCH_API _to_dense_out { + using schema = at::Tensor & (const at::Tensor &, ::std::optional, ::std::optional, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_to_dense"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_to_dense.out(Tensor self, ScalarType? dtype=None, bool? masked_grad=None, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, ::std::optional dtype, ::std::optional masked_grad, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, ::std::optional dtype, ::std::optional masked_grad, at::Tensor & out); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse.h new file mode 100644 index 0000000000000000000000000000000000000000..1f550d64442cbc331490726dcf8567de9f149b9e --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse.h @@ -0,0 +1,44 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_to_sparse.sparse_dim_out(Tensor self, int sparse_dim, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _to_sparse_out(at::Tensor & out, const at::Tensor & self, int64_t sparse_dim) { + return at::_ops::_to_sparse_sparse_dim_out::call(self, sparse_dim, out); +} +// aten::_to_sparse.sparse_dim_out(Tensor self, int sparse_dim, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _to_sparse_outf(const at::Tensor & self, int64_t sparse_dim, at::Tensor & out) { + return at::_ops::_to_sparse_sparse_dim_out::call(self, sparse_dim, out); +} + +// aten::_to_sparse.out(Tensor self, *, Layout? layout=None, int[2]? blocksize=None, int? dense_dim=None, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _to_sparse_out(at::Tensor & out, const at::Tensor & self, ::std::optional layout=::std::nullopt, at::OptionalIntArrayRef blocksize=::std::nullopt, ::std::optional dense_dim=::std::nullopt) { + return at::_ops::_to_sparse_out::call(self, layout, blocksize, dense_dim, out); +} +// aten::_to_sparse.out(Tensor self, *, Layout? layout=None, int[2]? blocksize=None, int? dense_dim=None, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _to_sparse_outf(const at::Tensor & self, ::std::optional layout, at::OptionalIntArrayRef blocksize, ::std::optional dense_dim, at::Tensor & out) { + return at::_ops::_to_sparse_out::call(self, layout, blocksize, dense_dim, out); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_bsc.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_bsc.h new file mode 100644 index 0000000000000000000000000000000000000000..f3b1b8ce0ce31a16ab8365103745706f7dd6fdb6 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_bsc.h @@ -0,0 +1,35 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_to_sparse_bsc.out(Tensor self, int[2] blocksize, int? dense_dim=None, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _to_sparse_bsc_out(at::Tensor & out, const at::Tensor & self, at::IntArrayRef blocksize, ::std::optional dense_dim=::std::nullopt) { + return at::_ops::_to_sparse_bsc_out::call(self, blocksize, dense_dim, out); +} +// aten::_to_sparse_bsc.out(Tensor self, int[2] blocksize, int? dense_dim=None, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _to_sparse_bsc_outf(const at::Tensor & self, at::IntArrayRef blocksize, ::std::optional dense_dim, at::Tensor & out) { + return at::_ops::_to_sparse_bsc_out::call(self, blocksize, dense_dim, out); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_bsc_compositeexplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_bsc_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..13560fbd4e7a5c0bf826233aa2d99803ff43d50c --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_bsc_compositeexplicitautograd_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor & _to_sparse_bsc_out(at::Tensor & out, const at::Tensor & self, at::IntArrayRef blocksize, ::std::optional dense_dim=::std::nullopt); +TORCH_API at::Tensor & _to_sparse_bsc_outf(const at::Tensor & self, at::IntArrayRef blocksize, ::std::optional dense_dim, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_bsc_cpu_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_bsc_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..ac6aaedfe8fc8cef0b8194b2acf9c68ca7758fa9 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_bsc_cpu_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor _to_sparse_bsc(const at::Tensor & self, at::IntArrayRef blocksize, ::std::optional dense_dim=::std::nullopt); + +} // namespace cpu +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_bsc_cuda_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_bsc_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..daf04dcd0e28207e0640fd85daaf9e6633223e76 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_bsc_cuda_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor _to_sparse_bsc(const at::Tensor & self, at::IntArrayRef blocksize, ::std::optional dense_dim=::std::nullopt); + +} // namespace cuda +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_bsc_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_bsc_native.h new file mode 100644 index 0000000000000000000000000000000000000000..fc3ff2c92e52f94ce499b1af0ab099619cae23b9 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_bsc_native.h @@ -0,0 +1,24 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor & _to_sparse_bsc_out(const at::Tensor & self, at::IntArrayRef blocksize, ::std::optional dense_dim, at::Tensor & out); +TORCH_API at::Tensor dense_to_sparse_bsc(const at::Tensor & self, at::IntArrayRef blocksize, ::std::optional dense_dim=::std::nullopt); +TORCH_API at::Tensor coo_to_sparse_bsc(const at::Tensor & self, at::IntArrayRef blocksize, ::std::optional dense_dim=::std::nullopt); +TORCH_API at::Tensor sparse_compressed_to_sparse_bsc(const at::Tensor & self, at::IntArrayRef blocksize, ::std::optional dense_dim=::std::nullopt); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_bsc_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_bsc_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..b54f4725f4a98ddd9f7560f544f1be823b095d37 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_bsc_ops.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _to_sparse_bsc { + using schema = at::Tensor (const at::Tensor &, at::IntArrayRef, ::std::optional); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_to_sparse_bsc"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_to_sparse_bsc(Tensor self, int[2] blocksize, int? dense_dim=None) -> Tensor"; + static at::Tensor call(const at::Tensor & self, at::IntArrayRef blocksize, ::std::optional dense_dim); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::IntArrayRef blocksize, ::std::optional dense_dim); +}; + +struct TORCH_API _to_sparse_bsc_out { + using schema = at::Tensor & (const at::Tensor &, at::IntArrayRef, ::std::optional, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_to_sparse_bsc"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_to_sparse_bsc.out(Tensor self, int[2] blocksize, int? dense_dim=None, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, at::IntArrayRef blocksize, ::std::optional dense_dim, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::IntArrayRef blocksize, ::std::optional dense_dim, at::Tensor & out); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_bsr.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_bsr.h new file mode 100644 index 0000000000000000000000000000000000000000..966b5990fc63c326bd51269079c8fadf98a064fe --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_bsr.h @@ -0,0 +1,35 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_to_sparse_bsr.out(Tensor self, int[2] blocksize, int? dense_dim=None, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _to_sparse_bsr_out(at::Tensor & out, const at::Tensor & self, at::IntArrayRef blocksize, ::std::optional dense_dim=::std::nullopt) { + return at::_ops::_to_sparse_bsr_out::call(self, blocksize, dense_dim, out); +} +// aten::_to_sparse_bsr.out(Tensor self, int[2] blocksize, int? dense_dim=None, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _to_sparse_bsr_outf(const at::Tensor & self, at::IntArrayRef blocksize, ::std::optional dense_dim, at::Tensor & out) { + return at::_ops::_to_sparse_bsr_out::call(self, blocksize, dense_dim, out); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_bsr_compositeexplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_bsr_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..40ba94d39c9a8f0192f80d67d192770db50bb158 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_bsr_compositeexplicitautograd_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor & _to_sparse_bsr_out(at::Tensor & out, const at::Tensor & self, at::IntArrayRef blocksize, ::std::optional dense_dim=::std::nullopt); +TORCH_API at::Tensor & _to_sparse_bsr_outf(const at::Tensor & self, at::IntArrayRef blocksize, ::std::optional dense_dim, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_bsr_cpu_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_bsr_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..a32eba42a5fdf302b346bd25645c1e41d4c06fe3 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_bsr_cpu_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor _to_sparse_bsr(const at::Tensor & self, at::IntArrayRef blocksize, ::std::optional dense_dim=::std::nullopt); + +} // namespace cpu +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_bsr_cuda_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_bsr_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..0cc4c0cb0e274a9d4c9265a0d41a81f4fa752a20 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_bsr_cuda_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor _to_sparse_bsr(const at::Tensor & self, at::IntArrayRef blocksize, ::std::optional dense_dim=::std::nullopt); + +} // namespace cuda +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_bsr_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_bsr_native.h new file mode 100644 index 0000000000000000000000000000000000000000..954dca3632574b7182c79e0f472b8d17fbabc36b --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_bsr_native.h @@ -0,0 +1,24 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor & _to_sparse_bsr_out(const at::Tensor & self, at::IntArrayRef blocksize, ::std::optional dense_dim, at::Tensor & out); +TORCH_API at::Tensor dense_to_sparse_bsr(const at::Tensor & self, at::IntArrayRef blocksize, ::std::optional dense_dim=::std::nullopt); +TORCH_API at::Tensor coo_to_sparse_bsr(const at::Tensor & self, at::IntArrayRef blocksize, ::std::optional dense_dim=::std::nullopt); +TORCH_API at::Tensor sparse_compressed_to_sparse_bsr(const at::Tensor & self, at::IntArrayRef blocksize, ::std::optional dense_dim=::std::nullopt); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_bsr_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_bsr_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..3759f4c5eb11c749358837727755f7479aaa50c3 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_bsr_ops.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _to_sparse_bsr { + using schema = at::Tensor (const at::Tensor &, at::IntArrayRef, ::std::optional); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_to_sparse_bsr"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_to_sparse_bsr(Tensor self, int[2] blocksize, int? dense_dim=None) -> Tensor"; + static at::Tensor call(const at::Tensor & self, at::IntArrayRef blocksize, ::std::optional dense_dim); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::IntArrayRef blocksize, ::std::optional dense_dim); +}; + +struct TORCH_API _to_sparse_bsr_out { + using schema = at::Tensor & (const at::Tensor &, at::IntArrayRef, ::std::optional, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_to_sparse_bsr"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_to_sparse_bsr.out(Tensor self, int[2] blocksize, int? dense_dim=None, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, at::IntArrayRef blocksize, ::std::optional dense_dim, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::IntArrayRef blocksize, ::std::optional dense_dim, at::Tensor & out); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_compositeexplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..c0b91869340d55facec4f65ed4c5a17e7ed45bdb --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_compositeexplicitautograd_dispatch.h @@ -0,0 +1,26 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor & _to_sparse_out(at::Tensor & out, const at::Tensor & self, int64_t sparse_dim); +TORCH_API at::Tensor & _to_sparse_outf(const at::Tensor & self, int64_t sparse_dim, at::Tensor & out); +TORCH_API at::Tensor & _to_sparse_out(at::Tensor & out, const at::Tensor & self, ::std::optional layout=::std::nullopt, at::OptionalIntArrayRef blocksize=::std::nullopt, ::std::optional dense_dim=::std::nullopt); +TORCH_API at::Tensor & _to_sparse_outf(const at::Tensor & self, ::std::optional layout, at::OptionalIntArrayRef blocksize, ::std::optional dense_dim, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_cpu_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..8b28c36055049a19cf809bac5c8f9ccd507e61ea --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_cpu_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor _to_sparse(const at::Tensor & self, int64_t sparse_dim); +TORCH_API at::Tensor _to_sparse(const at::Tensor & self, ::std::optional layout=::std::nullopt, at::OptionalIntArrayRef blocksize=::std::nullopt, ::std::optional dense_dim=::std::nullopt); + +} // namespace cpu +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_csc.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_csc.h new file mode 100644 index 0000000000000000000000000000000000000000..7bd23237b450f8851da9f9c8df8ae7e3b28a5465 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_csc.h @@ -0,0 +1,35 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_to_sparse_csc.out(Tensor self, int? dense_dim=None, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _to_sparse_csc_out(at::Tensor & out, const at::Tensor & self, ::std::optional dense_dim=::std::nullopt) { + return at::_ops::_to_sparse_csc_out::call(self, dense_dim, out); +} +// aten::_to_sparse_csc.out(Tensor self, int? dense_dim=None, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _to_sparse_csc_outf(const at::Tensor & self, ::std::optional dense_dim, at::Tensor & out) { + return at::_ops::_to_sparse_csc_out::call(self, dense_dim, out); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_csc_compositeexplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_csc_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..5f49cd2267344a3925aae632d50fcd1536b49f9f --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_csc_compositeexplicitautograd_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor & _to_sparse_csc_out(at::Tensor & out, const at::Tensor & self, ::std::optional dense_dim=::std::nullopt); +TORCH_API at::Tensor & _to_sparse_csc_outf(const at::Tensor & self, ::std::optional dense_dim, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_csc_cpu_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_csc_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..eb7cae514a501d3c9955e3c6d4d18e5af8513095 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_csc_cpu_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor _to_sparse_csc(const at::Tensor & self, ::std::optional dense_dim=::std::nullopt); + +} // namespace cpu +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_csc_cuda_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_csc_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..428c97da0701fcc19ef1a027ca7c1475674b3c06 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_csc_cuda_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor _to_sparse_csc(const at::Tensor & self, ::std::optional dense_dim=::std::nullopt); + +} // namespace cuda +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_csc_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_csc_native.h new file mode 100644 index 0000000000000000000000000000000000000000..a1133ff1372f3fd4891810e46b9bc354ee703ba0 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_csc_native.h @@ -0,0 +1,24 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor & _to_sparse_csc_out(const at::Tensor & self, ::std::optional dense_dim, at::Tensor & out); +TORCH_API at::Tensor dense_to_sparse_csc(const at::Tensor & self, ::std::optional dense_dim=::std::nullopt); +TORCH_API at::Tensor coo_to_sparse_csc(const at::Tensor & self, ::std::optional dense_dim=::std::nullopt); +TORCH_API at::Tensor sparse_compressed_to_sparse_csc(const at::Tensor & self, ::std::optional dense_dim=::std::nullopt); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_csc_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_csc_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..acdb662ecbc3cf00cc7be0f52e21a315d443d0e1 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_csc_ops.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _to_sparse_csc { + using schema = at::Tensor (const at::Tensor &, ::std::optional); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_to_sparse_csc"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_to_sparse_csc(Tensor self, int? dense_dim=None) -> Tensor"; + static at::Tensor call(const at::Tensor & self, ::std::optional dense_dim); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, ::std::optional dense_dim); +}; + +struct TORCH_API _to_sparse_csc_out { + using schema = at::Tensor & (const at::Tensor &, ::std::optional, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_to_sparse_csc"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_to_sparse_csc.out(Tensor self, int? dense_dim=None, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, ::std::optional dense_dim, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, ::std::optional dense_dim, at::Tensor & out); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_csr.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_csr.h new file mode 100644 index 0000000000000000000000000000000000000000..019b092e7e9a29652c66984871fc8c61eca7de9b --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_csr.h @@ -0,0 +1,35 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_to_sparse_csr.out(Tensor self, int? dense_dim=None, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _to_sparse_csr_out(at::Tensor & out, const at::Tensor & self, ::std::optional dense_dim=::std::nullopt) { + return at::_ops::_to_sparse_csr_out::call(self, dense_dim, out); +} +// aten::_to_sparse_csr.out(Tensor self, int? dense_dim=None, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _to_sparse_csr_outf(const at::Tensor & self, ::std::optional dense_dim, at::Tensor & out) { + return at::_ops::_to_sparse_csr_out::call(self, dense_dim, out); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_csr_compositeexplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_csr_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..136c6ef5ebb8037dbc1119bd2708c18ad7b23500 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_csr_compositeexplicitautograd_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor & _to_sparse_csr_out(at::Tensor & out, const at::Tensor & self, ::std::optional dense_dim=::std::nullopt); +TORCH_API at::Tensor & _to_sparse_csr_outf(const at::Tensor & self, ::std::optional dense_dim, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_csr_cpu_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_csr_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..aaa0eaced780305226993522b05b303ad44c1887 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_csr_cpu_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor _to_sparse_csr(const at::Tensor & self, ::std::optional dense_dim=::std::nullopt); + +} // namespace cpu +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_csr_cuda_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_csr_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..f0db3f88096e58bf93f1a01476367a3e9107972e --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_csr_cuda_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor _to_sparse_csr(const at::Tensor & self, ::std::optional dense_dim=::std::nullopt); + +} // namespace cuda +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_csr_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_csr_native.h new file mode 100644 index 0000000000000000000000000000000000000000..d160a3047dfd8614b55fff642acd417d202d35f0 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_csr_native.h @@ -0,0 +1,24 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor & _to_sparse_csr_out(const at::Tensor & self, ::std::optional dense_dim, at::Tensor & out); +TORCH_API at::Tensor dense_to_sparse_csr(const at::Tensor & self, ::std::optional dense_dim=::std::nullopt); +TORCH_API at::Tensor coo_to_sparse_csr(const at::Tensor & self, ::std::optional dense_dim=::std::nullopt); +TORCH_API at::Tensor sparse_compressed_to_sparse_csr(const at::Tensor & self, ::std::optional dense_dim=::std::nullopt); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_csr_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_csr_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..364350950cd8bf1b05d3c97a0e4c76aed1c94a03 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_csr_ops.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _to_sparse_csr { + using schema = at::Tensor (const at::Tensor &, ::std::optional); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_to_sparse_csr"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_to_sparse_csr(Tensor self, int? dense_dim=None) -> Tensor"; + static at::Tensor call(const at::Tensor & self, ::std::optional dense_dim); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, ::std::optional dense_dim); +}; + +struct TORCH_API _to_sparse_csr_out { + using schema = at::Tensor & (const at::Tensor &, ::std::optional, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_to_sparse_csr"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_to_sparse_csr.out(Tensor self, int? dense_dim=None, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, ::std::optional dense_dim, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, ::std::optional dense_dim, at::Tensor & out); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_cuda_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..ee72554fa000e82a47839e9cbcf3c09683459b24 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_cuda_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor _to_sparse(const at::Tensor & self, int64_t sparse_dim); +TORCH_API at::Tensor _to_sparse(const at::Tensor & self, ::std::optional layout=::std::nullopt, at::OptionalIntArrayRef blocksize=::std::nullopt, ::std::optional dense_dim=::std::nullopt); + +} // namespace cuda +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_native.h new file mode 100644 index 0000000000000000000000000000000000000000..ab284c097bd4a1fcd615cdcd130cb22b21a119d6 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_native.h @@ -0,0 +1,28 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor & _to_sparse_sparse_dim_out(const at::Tensor & self, int64_t sparse_dim, at::Tensor & out); +TORCH_API at::Tensor dense_to_sparse(const at::Tensor & self, int64_t sparse_dim); +TORCH_API at::Tensor sparse_coo_to_sparse(const at::Tensor & self, int64_t sparse_dim); +TORCH_API at::Tensor sparse_compressed_to_sparse(const at::Tensor & self, int64_t sparse_dim); +TORCH_API at::Tensor & _to_sparse_out(const at::Tensor & self, ::std::optional layout, at::OptionalIntArrayRef blocksize, ::std::optional dense_dim, at::Tensor & out); +TORCH_API at::Tensor dense_to_sparse(const at::Tensor & self, ::std::optional layout=::std::nullopt, at::OptionalIntArrayRef blocksize=::std::nullopt, ::std::optional dense_dim=::std::nullopt); +TORCH_API at::Tensor sparse_coo_to_sparse(const at::Tensor & self, ::std::optional layout=::std::nullopt, at::OptionalIntArrayRef blocksize=::std::nullopt, ::std::optional dense_dim=::std::nullopt); +TORCH_API at::Tensor sparse_compressed_to_sparse(const at::Tensor & self, ::std::optional layout=::std::nullopt, at::OptionalIntArrayRef blocksize=::std::nullopt, ::std::optional dense_dim=::std::nullopt); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..f6950aa38495a1b753034b7b8c5bbca72f65604c --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_ops.h @@ -0,0 +1,62 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _to_sparse_sparse_dim { + using schema = at::Tensor (const at::Tensor &, int64_t); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_to_sparse"; + static constexpr const char* overload_name = "sparse_dim"; + static constexpr const char* schema_str = "_to_sparse.sparse_dim(Tensor self, int sparse_dim) -> Tensor"; + static at::Tensor call(const at::Tensor & self, int64_t sparse_dim); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, int64_t sparse_dim); +}; + +struct TORCH_API _to_sparse { + using schema = at::Tensor (const at::Tensor &, ::std::optional, at::OptionalIntArrayRef, ::std::optional); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_to_sparse"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_to_sparse(Tensor self, *, Layout? layout=None, int[2]? blocksize=None, int? dense_dim=None) -> Tensor"; + static at::Tensor call(const at::Tensor & self, ::std::optional layout, at::OptionalIntArrayRef blocksize, ::std::optional dense_dim); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, ::std::optional layout, at::OptionalIntArrayRef blocksize, ::std::optional dense_dim); +}; + +struct TORCH_API _to_sparse_sparse_dim_out { + using schema = at::Tensor & (const at::Tensor &, int64_t, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_to_sparse"; + static constexpr const char* overload_name = "sparse_dim_out"; + static constexpr const char* schema_str = "_to_sparse.sparse_dim_out(Tensor self, int sparse_dim, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, int64_t sparse_dim, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, int64_t sparse_dim, at::Tensor & out); +}; + +struct TORCH_API _to_sparse_out { + using schema = at::Tensor & (const at::Tensor &, ::std::optional, at::OptionalIntArrayRef, ::std::optional, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_to_sparse"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_to_sparse.out(Tensor self, *, Layout? layout=None, int[2]? blocksize=None, int? dense_dim=None, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, ::std::optional layout, at::OptionalIntArrayRef blocksize, ::std::optional dense_dim, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, ::std::optional layout, at::OptionalIntArrayRef blocksize, ::std::optional dense_dim, at::Tensor & out); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_semi_structured.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_semi_structured.h new file mode 100644 index 0000000000000000000000000000000000000000..216a736e5442ff2526c059ebd163a36e606d0c67 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_semi_structured.h @@ -0,0 +1,31 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_to_sparse_semi_structured(Tensor dense) -> (Tensor, Tensor) +inline ::std::tuple _to_sparse_semi_structured(const at::Tensor & dense) { + return at::_ops::_to_sparse_semi_structured::call(dense); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_semi_structured_cuda_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_semi_structured_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..42f9aa8b070577d9c104ffb0a71bf20c913273c3 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_semi_structured_cuda_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API ::std::tuple _to_sparse_semi_structured(const at::Tensor & dense); + +} // namespace cuda +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_semi_structured_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_semi_structured_native.h new file mode 100644 index 0000000000000000000000000000000000000000..5eb932b7c6fbfa6161b24c8e309f91d24531a280 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_semi_structured_native.h @@ -0,0 +1,21 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API ::std::tuple _to_sparse_semi_structured(const at::Tensor & dense); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_semi_structured_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_semi_structured_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..46f8362ea8c35929fb76a10e295caa5ef8ae8764 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_to_sparse_semi_structured_ops.h @@ -0,0 +1,29 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _to_sparse_semi_structured { + using schema = ::std::tuple (const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_to_sparse_semi_structured"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_to_sparse_semi_structured(Tensor dense) -> (Tensor, Tensor)"; + static ::std::tuple call(const at::Tensor & dense); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & dense); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_transform_bias_rescale_qkv.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_transform_bias_rescale_qkv.h new file mode 100644 index 0000000000000000000000000000000000000000..c40b3b05f1af48a2f2726c5f3fd0c18ce587d1d0 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_transform_bias_rescale_qkv.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_transform_bias_rescale_qkv(Tensor qkv, Tensor qkv_bias, int num_heads) -> (Tensor, Tensor, Tensor) +inline ::std::tuple _transform_bias_rescale_qkv(const at::Tensor & qkv, const at::Tensor & qkv_bias, int64_t num_heads) { + return at::_ops::_transform_bias_rescale_qkv::call(qkv, qkv_bias, num_heads); +} + +// aten::_transform_bias_rescale_qkv.out(Tensor qkv, Tensor qkv_bias, int num_heads, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2) -> (Tensor(a!), Tensor(b!), Tensor(c!)) +inline ::std::tuple _transform_bias_rescale_qkv_out(at::Tensor & out0, at::Tensor & out1, at::Tensor & out2, const at::Tensor & qkv, const at::Tensor & qkv_bias, int64_t num_heads) { + return at::_ops::_transform_bias_rescale_qkv_out::call(qkv, qkv_bias, num_heads, out0, out1, out2); +} +// aten::_transform_bias_rescale_qkv.out(Tensor qkv, Tensor qkv_bias, int num_heads, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2) -> (Tensor(a!), Tensor(b!), Tensor(c!)) +inline ::std::tuple _transform_bias_rescale_qkv_outf(const at::Tensor & qkv, const at::Tensor & qkv_bias, int64_t num_heads, at::Tensor & out0, at::Tensor & out1, at::Tensor & out2) { + return at::_ops::_transform_bias_rescale_qkv_out::call(qkv, qkv_bias, num_heads, out0, out1, out2); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_transform_bias_rescale_qkv_compositeexplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_transform_bias_rescale_qkv_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..adeea58fc26e8c548050204ed4e6f292bc7cbc52 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_transform_bias_rescale_qkv_compositeexplicitautograd_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API ::std::tuple _transform_bias_rescale_qkv_out(at::Tensor & out0, at::Tensor & out1, at::Tensor & out2, const at::Tensor & qkv, const at::Tensor & qkv_bias, int64_t num_heads); +TORCH_API ::std::tuple _transform_bias_rescale_qkv_outf(const at::Tensor & qkv, const at::Tensor & qkv_bias, int64_t num_heads, at::Tensor & out0, at::Tensor & out1, at::Tensor & out2); + +} // namespace compositeexplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_transform_bias_rescale_qkv_cpu_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_transform_bias_rescale_qkv_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..92b9691ce1ad0cb7fc358f49981f40e9ccc4886a --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_transform_bias_rescale_qkv_cpu_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API ::std::tuple _transform_bias_rescale_qkv(const at::Tensor & qkv, const at::Tensor & qkv_bias, int64_t num_heads); + +} // namespace cpu +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_transform_bias_rescale_qkv_cuda_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_transform_bias_rescale_qkv_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..9d414773d7fb2d6187c13914aadb90d7080804d5 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_transform_bias_rescale_qkv_cuda_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API ::std::tuple _transform_bias_rescale_qkv(const at::Tensor & qkv, const at::Tensor & qkv_bias, int64_t num_heads); + +} // namespace cuda +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_transform_bias_rescale_qkv_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_transform_bias_rescale_qkv_native.h new file mode 100644 index 0000000000000000000000000000000000000000..989012a7bb106e44fb6078a09066b6b0ad6f7380 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_transform_bias_rescale_qkv_native.h @@ -0,0 +1,23 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API ::std::tuple _transform_bias_rescale_qkv_out(const at::Tensor & qkv, const at::Tensor & qkv_bias, int64_t num_heads, at::Tensor & out0, at::Tensor & out1, at::Tensor & out2); +TORCH_API ::std::tuple transform_bias_rescale_qkv_cpu(const at::Tensor & qkv, const at::Tensor & qkv_bias, int64_t num_heads); +TORCH_API ::std::tuple transform_bias_rescale_qkv_cuda(const at::Tensor & qkv, const at::Tensor & qkv_bias, int64_t num_heads); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_transform_bias_rescale_qkv_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_transform_bias_rescale_qkv_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..914f07c6407bbc73904859204dde98faa4e6dcce --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_transform_bias_rescale_qkv_ops.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _transform_bias_rescale_qkv { + using schema = ::std::tuple (const at::Tensor &, const at::Tensor &, int64_t); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_transform_bias_rescale_qkv"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_transform_bias_rescale_qkv(Tensor qkv, Tensor qkv_bias, int num_heads) -> (Tensor, Tensor, Tensor)"; + static ::std::tuple call(const at::Tensor & qkv, const at::Tensor & qkv_bias, int64_t num_heads); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & qkv, const at::Tensor & qkv_bias, int64_t num_heads); +}; + +struct TORCH_API _transform_bias_rescale_qkv_out { + using schema = ::std::tuple (const at::Tensor &, const at::Tensor &, int64_t, at::Tensor &, at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_transform_bias_rescale_qkv"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_transform_bias_rescale_qkv.out(Tensor qkv, Tensor qkv_bias, int num_heads, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2) -> (Tensor(a!), Tensor(b!), Tensor(c!))"; + static ::std::tuple call(const at::Tensor & qkv, const at::Tensor & qkv_bias, int64_t num_heads, at::Tensor & out0, at::Tensor & out1, at::Tensor & out2); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & qkv, const at::Tensor & qkv_bias, int64_t num_heads, at::Tensor & out0, at::Tensor & out1, at::Tensor & out2); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_transformer_encoder_layer_fwd.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_transformer_encoder_layer_fwd.h new file mode 100644 index 0000000000000000000000000000000000000000..b339006ca8cf9fe639781fcacb57d0a206a7cacc --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_transformer_encoder_layer_fwd.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_transformer_encoder_layer_fwd(Tensor src, int embed_dim, int num_heads, Tensor qkv_weight, Tensor qkv_bias, Tensor proj_weight, Tensor proj_bias, bool use_gelu, bool norm_first, float eps, Tensor norm_weight_1, Tensor norm_bias_1, Tensor norm_weight_2, Tensor norm_bias_2, Tensor ffn_weight_1, Tensor ffn_bias_1, Tensor ffn_weight_2, Tensor ffn_bias_2, Tensor? mask=None, int? mask_type=None) -> Tensor +inline at::Tensor _transformer_encoder_layer_fwd(const at::Tensor & src, int64_t embed_dim, int64_t num_heads, const at::Tensor & qkv_weight, const at::Tensor & qkv_bias, const at::Tensor & proj_weight, const at::Tensor & proj_bias, bool use_gelu, bool norm_first, double eps, const at::Tensor & norm_weight_1, const at::Tensor & norm_bias_1, const at::Tensor & norm_weight_2, const at::Tensor & norm_bias_2, const at::Tensor & ffn_weight_1, const at::Tensor & ffn_bias_1, const at::Tensor & ffn_weight_2, const at::Tensor & ffn_bias_2, const ::std::optional & mask={}, ::std::optional mask_type=::std::nullopt) { + return at::_ops::_transformer_encoder_layer_fwd::call(src, embed_dim, num_heads, qkv_weight, qkv_bias, proj_weight, proj_bias, use_gelu, norm_first, eps, norm_weight_1, norm_bias_1, norm_weight_2, norm_bias_2, ffn_weight_1, ffn_bias_1, ffn_weight_2, ffn_bias_2, mask, mask_type); +} + +// aten::_transformer_encoder_layer_fwd.out(Tensor src, int embed_dim, int num_heads, Tensor qkv_weight, Tensor qkv_bias, Tensor proj_weight, Tensor proj_bias, bool use_gelu, bool norm_first, float eps, Tensor norm_weight_1, Tensor norm_bias_1, Tensor norm_weight_2, Tensor norm_bias_2, Tensor ffn_weight_1, Tensor ffn_bias_1, Tensor ffn_weight_2, Tensor ffn_bias_2, Tensor? mask=None, int? mask_type=None, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _transformer_encoder_layer_fwd_out(at::Tensor & out, const at::Tensor & src, int64_t embed_dim, int64_t num_heads, const at::Tensor & qkv_weight, const at::Tensor & qkv_bias, const at::Tensor & proj_weight, const at::Tensor & proj_bias, bool use_gelu, bool norm_first, double eps, const at::Tensor & norm_weight_1, const at::Tensor & norm_bias_1, const at::Tensor & norm_weight_2, const at::Tensor & norm_bias_2, const at::Tensor & ffn_weight_1, const at::Tensor & ffn_bias_1, const at::Tensor & ffn_weight_2, const at::Tensor & ffn_bias_2, const ::std::optional & mask={}, ::std::optional mask_type=::std::nullopt) { + return at::_ops::_transformer_encoder_layer_fwd_out::call(src, embed_dim, num_heads, qkv_weight, qkv_bias, proj_weight, proj_bias, use_gelu, norm_first, eps, norm_weight_1, norm_bias_1, norm_weight_2, norm_bias_2, ffn_weight_1, ffn_bias_1, ffn_weight_2, ffn_bias_2, mask, mask_type, out); +} +// aten::_transformer_encoder_layer_fwd.out(Tensor src, int embed_dim, int num_heads, Tensor qkv_weight, Tensor qkv_bias, Tensor proj_weight, Tensor proj_bias, bool use_gelu, bool norm_first, float eps, Tensor norm_weight_1, Tensor norm_bias_1, Tensor norm_weight_2, Tensor norm_bias_2, Tensor ffn_weight_1, Tensor ffn_bias_1, Tensor ffn_weight_2, Tensor ffn_bias_2, Tensor? mask=None, int? mask_type=None, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _transformer_encoder_layer_fwd_outf(const at::Tensor & src, int64_t embed_dim, int64_t num_heads, const at::Tensor & qkv_weight, const at::Tensor & qkv_bias, const at::Tensor & proj_weight, const at::Tensor & proj_bias, bool use_gelu, bool norm_first, double eps, const at::Tensor & norm_weight_1, const at::Tensor & norm_bias_1, const at::Tensor & norm_weight_2, const at::Tensor & norm_bias_2, const at::Tensor & ffn_weight_1, const at::Tensor & ffn_bias_1, const at::Tensor & ffn_weight_2, const at::Tensor & ffn_bias_2, const ::std::optional & mask, ::std::optional mask_type, at::Tensor & out) { + return at::_ops::_transformer_encoder_layer_fwd_out::call(src, embed_dim, num_heads, qkv_weight, qkv_bias, proj_weight, proj_bias, use_gelu, norm_first, eps, norm_weight_1, norm_bias_1, norm_weight_2, norm_bias_2, ffn_weight_1, ffn_bias_1, ffn_weight_2, ffn_bias_2, mask, mask_type, out); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_transformer_encoder_layer_fwd_compositeexplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_transformer_encoder_layer_fwd_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..6ea5fa0af0bbd0fbf313b8d51ed4b933735fc5a0 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_transformer_encoder_layer_fwd_compositeexplicitautograd_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor & _transformer_encoder_layer_fwd_out(at::Tensor & out, const at::Tensor & src, int64_t embed_dim, int64_t num_heads, const at::Tensor & qkv_weight, const at::Tensor & qkv_bias, const at::Tensor & proj_weight, const at::Tensor & proj_bias, bool use_gelu, bool norm_first, double eps, const at::Tensor & norm_weight_1, const at::Tensor & norm_bias_1, const at::Tensor & norm_weight_2, const at::Tensor & norm_bias_2, const at::Tensor & ffn_weight_1, const at::Tensor & ffn_bias_1, const at::Tensor & ffn_weight_2, const at::Tensor & ffn_bias_2, const ::std::optional & mask={}, ::std::optional mask_type=::std::nullopt); +TORCH_API at::Tensor & _transformer_encoder_layer_fwd_outf(const at::Tensor & src, int64_t embed_dim, int64_t num_heads, const at::Tensor & qkv_weight, const at::Tensor & qkv_bias, const at::Tensor & proj_weight, const at::Tensor & proj_bias, bool use_gelu, bool norm_first, double eps, const at::Tensor & norm_weight_1, const at::Tensor & norm_bias_1, const at::Tensor & norm_weight_2, const at::Tensor & norm_bias_2, const at::Tensor & ffn_weight_1, const at::Tensor & ffn_bias_1, const at::Tensor & ffn_weight_2, const at::Tensor & ffn_bias_2, const ::std::optional & mask, ::std::optional mask_type, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_transformer_encoder_layer_fwd_cpu_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_transformer_encoder_layer_fwd_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..bccf3d4b482a78a3225c3d07b2487ff2a5a5793e --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_transformer_encoder_layer_fwd_cpu_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor _transformer_encoder_layer_fwd(const at::Tensor & src, int64_t embed_dim, int64_t num_heads, const at::Tensor & qkv_weight, const at::Tensor & qkv_bias, const at::Tensor & proj_weight, const at::Tensor & proj_bias, bool use_gelu, bool norm_first, double eps, const at::Tensor & norm_weight_1, const at::Tensor & norm_bias_1, const at::Tensor & norm_weight_2, const at::Tensor & norm_bias_2, const at::Tensor & ffn_weight_1, const at::Tensor & ffn_bias_1, const at::Tensor & ffn_weight_2, const at::Tensor & ffn_bias_2, const ::std::optional & mask={}, ::std::optional mask_type=::std::nullopt); + +} // namespace cpu +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_transformer_encoder_layer_fwd_cuda_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_transformer_encoder_layer_fwd_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..c86330d7e5b75e15bd6a6bd0e6084edfb85068e0 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_transformer_encoder_layer_fwd_cuda_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor _transformer_encoder_layer_fwd(const at::Tensor & src, int64_t embed_dim, int64_t num_heads, const at::Tensor & qkv_weight, const at::Tensor & qkv_bias, const at::Tensor & proj_weight, const at::Tensor & proj_bias, bool use_gelu, bool norm_first, double eps, const at::Tensor & norm_weight_1, const at::Tensor & norm_bias_1, const at::Tensor & norm_weight_2, const at::Tensor & norm_bias_2, const at::Tensor & ffn_weight_1, const at::Tensor & ffn_bias_1, const at::Tensor & ffn_weight_2, const at::Tensor & ffn_bias_2, const ::std::optional & mask={}, ::std::optional mask_type=::std::nullopt); + +} // namespace cuda +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_transformer_encoder_layer_fwd_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_transformer_encoder_layer_fwd_native.h new file mode 100644 index 0000000000000000000000000000000000000000..b39aa8abdee36743c411d8ddf324445c3fec83da --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_transformer_encoder_layer_fwd_native.h @@ -0,0 +1,22 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor & _transformer_encoder_layer_fwd_out(const at::Tensor & src, int64_t embed_dim, int64_t num_heads, const at::Tensor & qkv_weight, const at::Tensor & qkv_bias, const at::Tensor & proj_weight, const at::Tensor & proj_bias, bool use_gelu, bool norm_first, double eps, const at::Tensor & norm_weight_1, const at::Tensor & norm_bias_1, const at::Tensor & norm_weight_2, const at::Tensor & norm_bias_2, const at::Tensor & ffn_weight_1, const at::Tensor & ffn_bias_1, const at::Tensor & ffn_weight_2, const at::Tensor & ffn_bias_2, const ::std::optional & mask, ::std::optional mask_type, at::Tensor & out); +TORCH_API at::Tensor transformer_encoder_layer_forward(const at::Tensor & src, int64_t embed_dim, int64_t num_heads, const at::Tensor & qkv_weight, const at::Tensor & qkv_bias, const at::Tensor & proj_weight, const at::Tensor & proj_bias, bool use_gelu, bool norm_first, double eps, const at::Tensor & norm_weight_1, const at::Tensor & norm_bias_1, const at::Tensor & norm_weight_2, const at::Tensor & norm_bias_2, const at::Tensor & ffn_weight_1, const at::Tensor & ffn_bias_1, const at::Tensor & ffn_weight_2, const at::Tensor & ffn_bias_2, const ::std::optional & mask={}, ::std::optional mask_type=::std::nullopt); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_transformer_encoder_layer_fwd_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_transformer_encoder_layer_fwd_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..94856a3c69f9d6ec67eb06758e713181d9984d7e --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_transformer_encoder_layer_fwd_ops.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _transformer_encoder_layer_fwd { + using schema = at::Tensor (const at::Tensor &, int64_t, int64_t, const at::Tensor &, const at::Tensor &, const at::Tensor &, const at::Tensor &, bool, bool, double, const at::Tensor &, const at::Tensor &, const at::Tensor &, const at::Tensor &, const at::Tensor &, const at::Tensor &, const at::Tensor &, const at::Tensor &, const ::std::optional &, ::std::optional); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_transformer_encoder_layer_fwd"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_transformer_encoder_layer_fwd(Tensor src, int embed_dim, int num_heads, Tensor qkv_weight, Tensor qkv_bias, Tensor proj_weight, Tensor proj_bias, bool use_gelu, bool norm_first, float eps, Tensor norm_weight_1, Tensor norm_bias_1, Tensor norm_weight_2, Tensor norm_bias_2, Tensor ffn_weight_1, Tensor ffn_bias_1, Tensor ffn_weight_2, Tensor ffn_bias_2, Tensor? mask=None, int? mask_type=None) -> Tensor"; + static at::Tensor call(const at::Tensor & src, int64_t embed_dim, int64_t num_heads, const at::Tensor & qkv_weight, const at::Tensor & qkv_bias, const at::Tensor & proj_weight, const at::Tensor & proj_bias, bool use_gelu, bool norm_first, double eps, const at::Tensor & norm_weight_1, const at::Tensor & norm_bias_1, const at::Tensor & norm_weight_2, const at::Tensor & norm_bias_2, const at::Tensor & ffn_weight_1, const at::Tensor & ffn_bias_1, const at::Tensor & ffn_weight_2, const at::Tensor & ffn_bias_2, const ::std::optional & mask, ::std::optional mask_type); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & src, int64_t embed_dim, int64_t num_heads, const at::Tensor & qkv_weight, const at::Tensor & qkv_bias, const at::Tensor & proj_weight, const at::Tensor & proj_bias, bool use_gelu, bool norm_first, double eps, const at::Tensor & norm_weight_1, const at::Tensor & norm_bias_1, const at::Tensor & norm_weight_2, const at::Tensor & norm_bias_2, const at::Tensor & ffn_weight_1, const at::Tensor & ffn_bias_1, const at::Tensor & ffn_weight_2, const at::Tensor & ffn_bias_2, const ::std::optional & mask, ::std::optional mask_type); +}; + +struct TORCH_API _transformer_encoder_layer_fwd_out { + using schema = at::Tensor & (const at::Tensor &, int64_t, int64_t, const at::Tensor &, const at::Tensor &, const at::Tensor &, const at::Tensor &, bool, bool, double, const at::Tensor &, const at::Tensor &, const at::Tensor &, const at::Tensor &, const at::Tensor &, const at::Tensor &, const at::Tensor &, const at::Tensor &, const ::std::optional &, ::std::optional, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_transformer_encoder_layer_fwd"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_transformer_encoder_layer_fwd.out(Tensor src, int embed_dim, int num_heads, Tensor qkv_weight, Tensor qkv_bias, Tensor proj_weight, Tensor proj_bias, bool use_gelu, bool norm_first, float eps, Tensor norm_weight_1, Tensor norm_bias_1, Tensor norm_weight_2, Tensor norm_bias_2, Tensor ffn_weight_1, Tensor ffn_bias_1, Tensor ffn_weight_2, Tensor ffn_bias_2, Tensor? mask=None, int? mask_type=None, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & src, int64_t embed_dim, int64_t num_heads, const at::Tensor & qkv_weight, const at::Tensor & qkv_bias, const at::Tensor & proj_weight, const at::Tensor & proj_bias, bool use_gelu, bool norm_first, double eps, const at::Tensor & norm_weight_1, const at::Tensor & norm_bias_1, const at::Tensor & norm_weight_2, const at::Tensor & norm_bias_2, const at::Tensor & ffn_weight_1, const at::Tensor & ffn_bias_1, const at::Tensor & ffn_weight_2, const at::Tensor & ffn_bias_2, const ::std::optional & mask, ::std::optional mask_type, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & src, int64_t embed_dim, int64_t num_heads, const at::Tensor & qkv_weight, const at::Tensor & qkv_bias, const at::Tensor & proj_weight, const at::Tensor & proj_bias, bool use_gelu, bool norm_first, double eps, const at::Tensor & norm_weight_1, const at::Tensor & norm_bias_1, const at::Tensor & norm_weight_2, const at::Tensor & norm_bias_2, const at::Tensor & ffn_weight_1, const at::Tensor & ffn_bias_1, const at::Tensor & ffn_weight_2, const at::Tensor & ffn_bias_2, const ::std::optional & mask, ::std::optional mask_type, at::Tensor & out); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_trilinear.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_trilinear.h new file mode 100644 index 0000000000000000000000000000000000000000..d5a8ab545bb7a7b8cfbfd27967ecb192a6ec78ec --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_trilinear.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_trilinear(Tensor i1, Tensor i2, Tensor i3, int[] expand1, int[] expand2, int[] expand3, int[] sumdim, int unroll_dim=1) -> Tensor +inline at::Tensor _trilinear(const at::Tensor & i1, const at::Tensor & i2, const at::Tensor & i3, at::IntArrayRef expand1, at::IntArrayRef expand2, at::IntArrayRef expand3, at::IntArrayRef sumdim, int64_t unroll_dim=1) { + return at::_ops::_trilinear::call(i1, i2, i3, expand1, expand2, expand3, sumdim, unroll_dim); +} + +// aten::_trilinear.out(Tensor i1, Tensor i2, Tensor i3, int[] expand1, int[] expand2, int[] expand3, int[] sumdim, int unroll_dim=1, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _trilinear_out(at::Tensor & out, const at::Tensor & i1, const at::Tensor & i2, const at::Tensor & i3, at::IntArrayRef expand1, at::IntArrayRef expand2, at::IntArrayRef expand3, at::IntArrayRef sumdim, int64_t unroll_dim=1) { + return at::_ops::_trilinear_out::call(i1, i2, i3, expand1, expand2, expand3, sumdim, unroll_dim, out); +} +// aten::_trilinear.out(Tensor i1, Tensor i2, Tensor i3, int[] expand1, int[] expand2, int[] expand3, int[] sumdim, int unroll_dim=1, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _trilinear_outf(const at::Tensor & i1, const at::Tensor & i2, const at::Tensor & i3, at::IntArrayRef expand1, at::IntArrayRef expand2, at::IntArrayRef expand3, at::IntArrayRef sumdim, int64_t unroll_dim, at::Tensor & out) { + return at::_ops::_trilinear_out::call(i1, i2, i3, expand1, expand2, expand3, sumdim, unroll_dim, out); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_trilinear_compositeexplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_trilinear_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..a0595b09183faaa27c176179cded7cfcb9e7f51a --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_trilinear_compositeexplicitautograd_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor & _trilinear_out(at::Tensor & out, const at::Tensor & i1, const at::Tensor & i2, const at::Tensor & i3, at::IntArrayRef expand1, at::IntArrayRef expand2, at::IntArrayRef expand3, at::IntArrayRef sumdim, int64_t unroll_dim=1); +TORCH_API at::Tensor & _trilinear_outf(const at::Tensor & i1, const at::Tensor & i2, const at::Tensor & i3, at::IntArrayRef expand1, at::IntArrayRef expand2, at::IntArrayRef expand3, at::IntArrayRef sumdim, int64_t unroll_dim, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_trilinear_compositeexplicitautogradnonfunctional_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_trilinear_compositeexplicitautogradnonfunctional_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..3176da1ce5bddc20ae210e126300f165dd45c29b --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_trilinear_compositeexplicitautogradnonfunctional_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautogradnonfunctional { + +TORCH_API at::Tensor _trilinear(const at::Tensor & i1, const at::Tensor & i2, const at::Tensor & i3, at::IntArrayRef expand1, at::IntArrayRef expand2, at::IntArrayRef expand3, at::IntArrayRef sumdim, int64_t unroll_dim=1); + +} // namespace compositeexplicitautogradnonfunctional +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_trilinear_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_trilinear_native.h new file mode 100644 index 0000000000000000000000000000000000000000..9a8385b17bdf32df9f5d1abcf44f131073485bc7 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_trilinear_native.h @@ -0,0 +1,22 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor & _trilinear_out(const at::Tensor & i1, const at::Tensor & i2, const at::Tensor & i3, at::IntArrayRef expand1, at::IntArrayRef expand2, at::IntArrayRef expand3, at::IntArrayRef sumdim, int64_t unroll_dim, at::Tensor & out); +TORCH_API at::Tensor _trilinear(const at::Tensor & i1, const at::Tensor & i2, const at::Tensor & i3, at::IntArrayRef expand1, at::IntArrayRef expand2, at::IntArrayRef expand3, at::IntArrayRef sumdim, int64_t unroll_dim=1); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_trilinear_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_trilinear_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..b671ae857eb18d2f898797f0e578e2e6bd098cb6 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_trilinear_ops.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _trilinear { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &, const at::Tensor &, at::IntArrayRef, at::IntArrayRef, at::IntArrayRef, at::IntArrayRef, int64_t); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_trilinear"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_trilinear(Tensor i1, Tensor i2, Tensor i3, int[] expand1, int[] expand2, int[] expand3, int[] sumdim, int unroll_dim=1) -> Tensor"; + static at::Tensor call(const at::Tensor & i1, const at::Tensor & i2, const at::Tensor & i3, at::IntArrayRef expand1, at::IntArrayRef expand2, at::IntArrayRef expand3, at::IntArrayRef sumdim, int64_t unroll_dim); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & i1, const at::Tensor & i2, const at::Tensor & i3, at::IntArrayRef expand1, at::IntArrayRef expand2, at::IntArrayRef expand3, at::IntArrayRef sumdim, int64_t unroll_dim); +}; + +struct TORCH_API _trilinear_out { + using schema = at::Tensor & (const at::Tensor &, const at::Tensor &, const at::Tensor &, at::IntArrayRef, at::IntArrayRef, at::IntArrayRef, at::IntArrayRef, int64_t, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_trilinear"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_trilinear.out(Tensor i1, Tensor i2, Tensor i3, int[] expand1, int[] expand2, int[] expand3, int[] sumdim, int unroll_dim=1, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & i1, const at::Tensor & i2, const at::Tensor & i3, at::IntArrayRef expand1, at::IntArrayRef expand2, at::IntArrayRef expand3, at::IntArrayRef sumdim, int64_t unroll_dim, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & i1, const at::Tensor & i2, const at::Tensor & i3, at::IntArrayRef expand1, at::IntArrayRef expand2, at::IntArrayRef expand3, at::IntArrayRef sumdim, int64_t unroll_dim, at::Tensor & out); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_triton_multi_head_attention.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_triton_multi_head_attention.h new file mode 100644 index 0000000000000000000000000000000000000000..bfb7f18e95f924c5525a47a9aebfca757fff485b --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_triton_multi_head_attention.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_triton_multi_head_attention(Tensor query, Tensor key, Tensor value, int embed_dim, int num_head, Tensor qkv_weight, Tensor qkv_bias, Tensor proj_weight, Tensor proj_bias, Tensor? mask=None) -> Tensor +inline at::Tensor _triton_multi_head_attention(const at::Tensor & query, const at::Tensor & key, const at::Tensor & value, int64_t embed_dim, int64_t num_head, const at::Tensor & qkv_weight, const at::Tensor & qkv_bias, const at::Tensor & proj_weight, const at::Tensor & proj_bias, const ::std::optional & mask={}) { + return at::_ops::_triton_multi_head_attention::call(query, key, value, embed_dim, num_head, qkv_weight, qkv_bias, proj_weight, proj_bias, mask); +} + +// aten::_triton_multi_head_attention.out(Tensor query, Tensor key, Tensor value, int embed_dim, int num_head, Tensor qkv_weight, Tensor qkv_bias, Tensor proj_weight, Tensor proj_bias, Tensor? mask=None, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _triton_multi_head_attention_out(at::Tensor & out, const at::Tensor & query, const at::Tensor & key, const at::Tensor & value, int64_t embed_dim, int64_t num_head, const at::Tensor & qkv_weight, const at::Tensor & qkv_bias, const at::Tensor & proj_weight, const at::Tensor & proj_bias, const ::std::optional & mask={}) { + return at::_ops::_triton_multi_head_attention_out::call(query, key, value, embed_dim, num_head, qkv_weight, qkv_bias, proj_weight, proj_bias, mask, out); +} +// aten::_triton_multi_head_attention.out(Tensor query, Tensor key, Tensor value, int embed_dim, int num_head, Tensor qkv_weight, Tensor qkv_bias, Tensor proj_weight, Tensor proj_bias, Tensor? mask=None, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _triton_multi_head_attention_outf(const at::Tensor & query, const at::Tensor & key, const at::Tensor & value, int64_t embed_dim, int64_t num_head, const at::Tensor & qkv_weight, const at::Tensor & qkv_bias, const at::Tensor & proj_weight, const at::Tensor & proj_bias, const ::std::optional & mask, at::Tensor & out) { + return at::_ops::_triton_multi_head_attention_out::call(query, key, value, embed_dim, num_head, qkv_weight, qkv_bias, proj_weight, proj_bias, mask, out); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_triton_multi_head_attention_compositeexplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_triton_multi_head_attention_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..bbe22615cb01e3510ef0508eafe5f0929cd9aed5 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_triton_multi_head_attention_compositeexplicitautograd_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor & _triton_multi_head_attention_out(at::Tensor & out, const at::Tensor & query, const at::Tensor & key, const at::Tensor & value, int64_t embed_dim, int64_t num_head, const at::Tensor & qkv_weight, const at::Tensor & qkv_bias, const at::Tensor & proj_weight, const at::Tensor & proj_bias, const ::std::optional & mask={}); +TORCH_API at::Tensor & _triton_multi_head_attention_outf(const at::Tensor & query, const at::Tensor & key, const at::Tensor & value, int64_t embed_dim, int64_t num_head, const at::Tensor & qkv_weight, const at::Tensor & qkv_bias, const at::Tensor & proj_weight, const at::Tensor & proj_bias, const ::std::optional & mask, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_triton_multi_head_attention_cuda_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_triton_multi_head_attention_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..fabc6e6a40794d3bdcdd01eb657752c9590bb4bb --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_triton_multi_head_attention_cuda_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor _triton_multi_head_attention(const at::Tensor & query, const at::Tensor & key, const at::Tensor & value, int64_t embed_dim, int64_t num_head, const at::Tensor & qkv_weight, const at::Tensor & qkv_bias, const at::Tensor & proj_weight, const at::Tensor & proj_bias, const ::std::optional & mask={}); + +} // namespace cuda +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_triton_multi_head_attention_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_triton_multi_head_attention_native.h new file mode 100644 index 0000000000000000000000000000000000000000..c7adb967da34a44947907acd73720d61c824ae6c --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_triton_multi_head_attention_native.h @@ -0,0 +1,22 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor & _triton_multi_head_attention_out(const at::Tensor & query, const at::Tensor & key, const at::Tensor & value, int64_t embed_dim, int64_t num_head, const at::Tensor & qkv_weight, const at::Tensor & qkv_bias, const at::Tensor & proj_weight, const at::Tensor & proj_bias, const ::std::optional & mask, at::Tensor & out); +TORCH_API at::Tensor triton_multi_head_attention(const at::Tensor & query, const at::Tensor & key, const at::Tensor & value, int64_t embed_dim, int64_t num_head, const at::Tensor & qkv_weight, const at::Tensor & qkv_bias, const at::Tensor & proj_weight, const at::Tensor & proj_bias, const ::std::optional & mask={}); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_triton_multi_head_attention_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_triton_multi_head_attention_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..013852fa9dda5a388528a70964d51d24a31dc449 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_triton_multi_head_attention_ops.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _triton_multi_head_attention { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &, const at::Tensor &, int64_t, int64_t, const at::Tensor &, const at::Tensor &, const at::Tensor &, const at::Tensor &, const ::std::optional &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_triton_multi_head_attention"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_triton_multi_head_attention(Tensor query, Tensor key, Tensor value, int embed_dim, int num_head, Tensor qkv_weight, Tensor qkv_bias, Tensor proj_weight, Tensor proj_bias, Tensor? mask=None) -> Tensor"; + static at::Tensor call(const at::Tensor & query, const at::Tensor & key, const at::Tensor & value, int64_t embed_dim, int64_t num_head, const at::Tensor & qkv_weight, const at::Tensor & qkv_bias, const at::Tensor & proj_weight, const at::Tensor & proj_bias, const ::std::optional & mask); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & query, const at::Tensor & key, const at::Tensor & value, int64_t embed_dim, int64_t num_head, const at::Tensor & qkv_weight, const at::Tensor & qkv_bias, const at::Tensor & proj_weight, const at::Tensor & proj_bias, const ::std::optional & mask); +}; + +struct TORCH_API _triton_multi_head_attention_out { + using schema = at::Tensor & (const at::Tensor &, const at::Tensor &, const at::Tensor &, int64_t, int64_t, const at::Tensor &, const at::Tensor &, const at::Tensor &, const at::Tensor &, const ::std::optional &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_triton_multi_head_attention"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_triton_multi_head_attention.out(Tensor query, Tensor key, Tensor value, int embed_dim, int num_head, Tensor qkv_weight, Tensor qkv_bias, Tensor proj_weight, Tensor proj_bias, Tensor? mask=None, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & query, const at::Tensor & key, const at::Tensor & value, int64_t embed_dim, int64_t num_head, const at::Tensor & qkv_weight, const at::Tensor & qkv_bias, const at::Tensor & proj_weight, const at::Tensor & proj_bias, const ::std::optional & mask, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & query, const at::Tensor & key, const at::Tensor & value, int64_t embed_dim, int64_t num_head, const at::Tensor & qkv_weight, const at::Tensor & qkv_bias, const at::Tensor & proj_weight, const at::Tensor & proj_bias, const ::std::optional & mask, at::Tensor & out); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_triton_scaled_dot_attention.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_triton_scaled_dot_attention.h new file mode 100644 index 0000000000000000000000000000000000000000..5857b15c039a64a79c6b52aa38568d72eec02df3 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_triton_scaled_dot_attention.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_triton_scaled_dot_attention(Tensor q, Tensor k, Tensor v, float dropout_p=0.0) -> Tensor +inline at::Tensor _triton_scaled_dot_attention(const at::Tensor & q, const at::Tensor & k, const at::Tensor & v, double dropout_p=0.0) { + return at::_ops::_triton_scaled_dot_attention::call(q, k, v, dropout_p); +} + +// aten::_triton_scaled_dot_attention.out(Tensor q, Tensor k, Tensor v, float dropout_p=0.0, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _triton_scaled_dot_attention_out(at::Tensor & out, const at::Tensor & q, const at::Tensor & k, const at::Tensor & v, double dropout_p=0.0) { + return at::_ops::_triton_scaled_dot_attention_out::call(q, k, v, dropout_p, out); +} +// aten::_triton_scaled_dot_attention.out(Tensor q, Tensor k, Tensor v, float dropout_p=0.0, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _triton_scaled_dot_attention_outf(const at::Tensor & q, const at::Tensor & k, const at::Tensor & v, double dropout_p, at::Tensor & out) { + return at::_ops::_triton_scaled_dot_attention_out::call(q, k, v, dropout_p, out); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_triton_scaled_dot_attention_compositeexplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_triton_scaled_dot_attention_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..07e2b2b38aee6a8e68ecb5de7c0743743d6d4a3e --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_triton_scaled_dot_attention_compositeexplicitautograd_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor & _triton_scaled_dot_attention_out(at::Tensor & out, const at::Tensor & q, const at::Tensor & k, const at::Tensor & v, double dropout_p=0.0); +TORCH_API at::Tensor & _triton_scaled_dot_attention_outf(const at::Tensor & q, const at::Tensor & k, const at::Tensor & v, double dropout_p, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_triton_scaled_dot_attention_cuda_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_triton_scaled_dot_attention_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..5031f97aeb3a26a87deb0bf3efd08573e5643cb6 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_triton_scaled_dot_attention_cuda_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor _triton_scaled_dot_attention(const at::Tensor & q, const at::Tensor & k, const at::Tensor & v, double dropout_p=0.0); + +} // namespace cuda +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_triton_scaled_dot_attention_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_triton_scaled_dot_attention_native.h new file mode 100644 index 0000000000000000000000000000000000000000..10c72c0876c4934903a2956bb2833fd3b3726694 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_triton_scaled_dot_attention_native.h @@ -0,0 +1,22 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor & _triton_scaled_dot_attention_out(const at::Tensor & q, const at::Tensor & k, const at::Tensor & v, double dropout_p, at::Tensor & out); +TORCH_API at::Tensor triton_scaled_dot_attention(const at::Tensor & q, const at::Tensor & k, const at::Tensor & v, double dropout_p=0.0); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_triton_scaled_dot_attention_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_triton_scaled_dot_attention_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..50379b51701a80cdadebea3973e289512e43409f --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_triton_scaled_dot_attention_ops.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _triton_scaled_dot_attention { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &, const at::Tensor &, double); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_triton_scaled_dot_attention"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_triton_scaled_dot_attention(Tensor q, Tensor k, Tensor v, float dropout_p=0.0) -> Tensor"; + static at::Tensor call(const at::Tensor & q, const at::Tensor & k, const at::Tensor & v, double dropout_p); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & q, const at::Tensor & k, const at::Tensor & v, double dropout_p); +}; + +struct TORCH_API _triton_scaled_dot_attention_out { + using schema = at::Tensor & (const at::Tensor &, const at::Tensor &, const at::Tensor &, double, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_triton_scaled_dot_attention"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_triton_scaled_dot_attention.out(Tensor q, Tensor k, Tensor v, float dropout_p=0.0, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & q, const at::Tensor & k, const at::Tensor & v, double dropout_p, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & q, const at::Tensor & k, const at::Tensor & v, double dropout_p, at::Tensor & out); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_unique.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_unique.h new file mode 100644 index 0000000000000000000000000000000000000000..3fca569f876e6a893cfbde34ff446241b9e8cfb9 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_unique.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_unique(Tensor self, bool sorted=True, bool return_inverse=False) -> (Tensor, Tensor) +inline ::std::tuple _unique(const at::Tensor & self, bool sorted=true, bool return_inverse=false) { + return at::_ops::_unique::call(self, sorted, return_inverse); +} + +// aten::_unique.out(Tensor self, bool sorted=True, bool return_inverse=False, *, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!)) +inline ::std::tuple _unique_out(at::Tensor & out0, at::Tensor & out1, const at::Tensor & self, bool sorted=true, bool return_inverse=false) { + return at::_ops::_unique_out::call(self, sorted, return_inverse, out0, out1); +} +// aten::_unique.out(Tensor self, bool sorted=True, bool return_inverse=False, *, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!)) +inline ::std::tuple _unique_outf(const at::Tensor & self, bool sorted, bool return_inverse, at::Tensor & out0, at::Tensor & out1) { + return at::_ops::_unique_out::call(self, sorted, return_inverse, out0, out1); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_unique2.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_unique2.h new file mode 100644 index 0000000000000000000000000000000000000000..bd059506f3310eadc1dfb6f80fadbc741ce859df --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_unique2.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_unique2(Tensor self, bool sorted=True, bool return_inverse=False, bool return_counts=False) -> (Tensor, Tensor, Tensor) +inline ::std::tuple _unique2(const at::Tensor & self, bool sorted=true, bool return_inverse=false, bool return_counts=false) { + return at::_ops::_unique2::call(self, sorted, return_inverse, return_counts); +} + +// aten::_unique2.out(Tensor self, bool sorted=True, bool return_inverse=False, bool return_counts=False, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2) -> (Tensor(a!), Tensor(b!), Tensor(c!)) +inline ::std::tuple _unique2_out(at::Tensor & out0, at::Tensor & out1, at::Tensor & out2, const at::Tensor & self, bool sorted=true, bool return_inverse=false, bool return_counts=false) { + return at::_ops::_unique2_out::call(self, sorted, return_inverse, return_counts, out0, out1, out2); +} +// aten::_unique2.out(Tensor self, bool sorted=True, bool return_inverse=False, bool return_counts=False, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2) -> (Tensor(a!), Tensor(b!), Tensor(c!)) +inline ::std::tuple _unique2_outf(const at::Tensor & self, bool sorted, bool return_inverse, bool return_counts, at::Tensor & out0, at::Tensor & out1, at::Tensor & out2) { + return at::_ops::_unique2_out::call(self, sorted, return_inverse, return_counts, out0, out1, out2); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_unique2_compositeexplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_unique2_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..e19fe1f8ab58b0a7ffcdee6929c77963cba734fc --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_unique2_compositeexplicitautograd_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API ::std::tuple _unique2_out(at::Tensor & out0, at::Tensor & out1, at::Tensor & out2, const at::Tensor & self, bool sorted=true, bool return_inverse=false, bool return_counts=false); +TORCH_API ::std::tuple _unique2_outf(const at::Tensor & self, bool sorted, bool return_inverse, bool return_counts, at::Tensor & out0, at::Tensor & out1, at::Tensor & out2); + +} // namespace compositeexplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_unique2_cpu_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_unique2_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..b83d10a026fd6f6cd10e80694f26b9b12414ec83 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_unique2_cpu_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API ::std::tuple _unique2(const at::Tensor & self, bool sorted=true, bool return_inverse=false, bool return_counts=false); + +} // namespace cpu +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_unique2_cuda_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_unique2_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..05fd8067fb186434a002ccaf87f9426a0fc3bf7f --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_unique2_cuda_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API ::std::tuple _unique2(const at::Tensor & self, bool sorted=true, bool return_inverse=false, bool return_counts=false); + +} // namespace cuda +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_unique2_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_unique2_native.h new file mode 100644 index 0000000000000000000000000000000000000000..5779c3a167900490f5c33d0081b9d352c920764f --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_unique2_native.h @@ -0,0 +1,23 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API ::std::tuple _unique2_out(const at::Tensor & self, bool sorted, bool return_inverse, bool return_counts, at::Tensor & out0, at::Tensor & out1, at::Tensor & out2); +TORCH_API ::std::tuple _unique2_cpu(const at::Tensor & self, bool sorted=true, bool return_inverse=false, bool return_counts=false); +TORCH_API ::std::tuple _unique2_cuda(const at::Tensor & self, bool sorted=true, bool return_inverse=false, bool return_counts=false); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_unique2_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_unique2_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..27e8456ba2cb8bcb6b13b10c9c517b2a1ae2255f --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_unique2_ops.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _unique2 { + using schema = ::std::tuple (const at::Tensor &, bool, bool, bool); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_unique2"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_unique2(Tensor self, bool sorted=True, bool return_inverse=False, bool return_counts=False) -> (Tensor, Tensor, Tensor)"; + static ::std::tuple call(const at::Tensor & self, bool sorted, bool return_inverse, bool return_counts); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, bool sorted, bool return_inverse, bool return_counts); +}; + +struct TORCH_API _unique2_out { + using schema = ::std::tuple (const at::Tensor &, bool, bool, bool, at::Tensor &, at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_unique2"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_unique2.out(Tensor self, bool sorted=True, bool return_inverse=False, bool return_counts=False, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2) -> (Tensor(a!), Tensor(b!), Tensor(c!))"; + static ::std::tuple call(const at::Tensor & self, bool sorted, bool return_inverse, bool return_counts, at::Tensor & out0, at::Tensor & out1, at::Tensor & out2); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, bool sorted, bool return_inverse, bool return_counts, at::Tensor & out0, at::Tensor & out1, at::Tensor & out2); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_unique_compositeexplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_unique_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..6fc23d05326f4b4306cef1adeb2419291ab6fb47 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_unique_compositeexplicitautograd_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API ::std::tuple _unique_out(at::Tensor & out0, at::Tensor & out1, const at::Tensor & self, bool sorted=true, bool return_inverse=false); +TORCH_API ::std::tuple _unique_outf(const at::Tensor & self, bool sorted, bool return_inverse, at::Tensor & out0, at::Tensor & out1); + +} // namespace compositeexplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_unique_cpu_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_unique_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..eec047ac798b7fcf13ca44f2d17a117cfda3cec7 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_unique_cpu_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API ::std::tuple _unique(const at::Tensor & self, bool sorted=true, bool return_inverse=false); + +} // namespace cpu +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_unique_cuda_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_unique_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..fcd15e3ca0ed48429411901a848bb34d25bc2b0e --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_unique_cuda_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API ::std::tuple _unique(const at::Tensor & self, bool sorted=true, bool return_inverse=false); + +} // namespace cuda +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_unique_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_unique_native.h new file mode 100644 index 0000000000000000000000000000000000000000..e03e3604ac59622e9b0c7f255069f8e8979708ba --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_unique_native.h @@ -0,0 +1,23 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API ::std::tuple _unique_out(const at::Tensor & self, bool sorted, bool return_inverse, at::Tensor & out0, at::Tensor & out1); +TORCH_API ::std::tuple _unique_cpu(const at::Tensor & self, bool sorted=true, bool return_inverse=false); +TORCH_API ::std::tuple _unique_cuda(const at::Tensor & self, bool sorted=true, bool return_inverse=false); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_unique_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_unique_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..245df647e2452bd438fad9878cddd09709b56a31 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_unique_ops.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _unique { + using schema = ::std::tuple (const at::Tensor &, bool, bool); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_unique"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_unique(Tensor self, bool sorted=True, bool return_inverse=False) -> (Tensor, Tensor)"; + static ::std::tuple call(const at::Tensor & self, bool sorted, bool return_inverse); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, bool sorted, bool return_inverse); +}; + +struct TORCH_API _unique_out { + using schema = ::std::tuple (const at::Tensor &, bool, bool, at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_unique"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_unique.out(Tensor self, bool sorted=True, bool return_inverse=False, *, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!))"; + static ::std::tuple call(const at::Tensor & self, bool sorted, bool return_inverse, at::Tensor & out0, at::Tensor & out1); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, bool sorted, bool return_inverse, at::Tensor & out0, at::Tensor & out1); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_unpack_dual.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_unpack_dual.h new file mode 100644 index 0000000000000000000000000000000000000000..2a9f7d23c8e510bfa86cc815398fa5837e316590 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_unpack_dual.h @@ -0,0 +1,31 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_unpack_dual(Tensor(a) dual, int level) -> (Tensor(a) primal, Tensor tangent) +inline ::std::tuple _unpack_dual(const at::Tensor & dual, int64_t level) { + return at::_ops::_unpack_dual::call(dual, level); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_unpack_dual_compositeimplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_unpack_dual_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..9d21b772457d83cf676e2aecf270c87238a67517 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_unpack_dual_compositeimplicitautograd_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API ::std::tuple _unpack_dual(const at::Tensor & dual, int64_t level); + +} // namespace compositeimplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_unpack_dual_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_unpack_dual_native.h new file mode 100644 index 0000000000000000000000000000000000000000..de63fc91a01443bb2f05a6d9c2207ad0af49a173 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_unpack_dual_native.h @@ -0,0 +1,21 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API ::std::tuple _unpack_dual(const at::Tensor & dual, int64_t level); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_unpack_dual_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_unpack_dual_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..1a10d612a282a9098638d4dd1fd5c0bf6306ecee --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_unpack_dual_ops.h @@ -0,0 +1,29 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _unpack_dual { + using schema = ::std::tuple (const at::Tensor &, int64_t); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_unpack_dual"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_unpack_dual(Tensor(a) dual, int level) -> (Tensor(a) primal, Tensor tangent)"; + static ::std::tuple call(const at::Tensor & dual, int64_t level); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & dual, int64_t level); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_unsafe_index.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_unsafe_index.h new file mode 100644 index 0000000000000000000000000000000000000000..834c6a4ef3aee2f893dd2d2fda55479393fb752d --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_unsafe_index.h @@ -0,0 +1,31 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_unsafe_index.Tensor(Tensor self, Tensor?[] indices) -> Tensor +inline at::Tensor _unsafe_index(const at::Tensor & self, const c10::List<::std::optional> & indices) { + return at::_ops::_unsafe_index_Tensor::call(self, indices); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_unsafe_index_compositeexplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_unsafe_index_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..a33cc648e140439313199bd50268f4db7b44faec --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_unsafe_index_compositeexplicitautograd_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor _unsafe_index(const at::Tensor & self, const c10::List<::std::optional> & indices); + +} // namespace compositeexplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_unsafe_index_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_unsafe_index_native.h new file mode 100644 index 0000000000000000000000000000000000000000..833983c98b85ce431252fa6e5f73c738603ad368 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_unsafe_index_native.h @@ -0,0 +1,21 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor _unsafe_index(const at::Tensor & self, const c10::List<::std::optional> & indices); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_unsafe_index_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_unsafe_index_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..1e30beabdc416824b21a616325976c8a56502c5c --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_unsafe_index_ops.h @@ -0,0 +1,29 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _unsafe_index_Tensor { + using schema = at::Tensor (const at::Tensor &, const c10::List<::std::optional> &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_unsafe_index"; + static constexpr const char* overload_name = "Tensor"; + static constexpr const char* schema_str = "_unsafe_index.Tensor(Tensor self, Tensor?[] indices) -> Tensor"; + static at::Tensor call(const at::Tensor & self, const c10::List<::std::optional> & indices); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const c10::List<::std::optional> & indices); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_unsafe_index_put.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_unsafe_index_put.h new file mode 100644 index 0000000000000000000000000000000000000000..9d792bdf979ce128567065f196cd6db72674ceb2 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_unsafe_index_put.h @@ -0,0 +1,31 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_unsafe_index_put(Tensor self, Tensor?[] indices, Tensor values, bool accumulate=False) -> Tensor +inline at::Tensor _unsafe_index_put(const at::Tensor & self, const c10::List<::std::optional> & indices, const at::Tensor & values, bool accumulate=false) { + return at::_ops::_unsafe_index_put::call(self, indices, values, accumulate); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_unsafe_index_put_compositeexplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_unsafe_index_put_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..812253acc520b7247cacdf92067ba1ed675fed67 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_unsafe_index_put_compositeexplicitautograd_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor _unsafe_index_put(const at::Tensor & self, const c10::List<::std::optional> & indices, const at::Tensor & values, bool accumulate=false); + +} // namespace compositeexplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_unsafe_index_put_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_unsafe_index_put_native.h new file mode 100644 index 0000000000000000000000000000000000000000..824aad6f0ab4c8a09ccdf06875cfb41e02f608a2 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_unsafe_index_put_native.h @@ -0,0 +1,21 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor _unsafe_index_put(const at::Tensor & self, const c10::List<::std::optional> & indices, const at::Tensor & values, bool accumulate=false); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_unsafe_index_put_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_unsafe_index_put_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..7dc62b49ac57def242065fb78ac2fc316d22ec0e --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_unsafe_index_put_ops.h @@ -0,0 +1,29 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _unsafe_index_put { + using schema = at::Tensor (const at::Tensor &, const c10::List<::std::optional> &, const at::Tensor &, bool); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_unsafe_index_put"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_unsafe_index_put(Tensor self, Tensor?[] indices, Tensor values, bool accumulate=False) -> Tensor"; + static at::Tensor call(const at::Tensor & self, const c10::List<::std::optional> & indices, const at::Tensor & values, bool accumulate); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const c10::List<::std::optional> & indices, const at::Tensor & values, bool accumulate); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_unsafe_masked_index.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_unsafe_masked_index.h new file mode 100644 index 0000000000000000000000000000000000000000..cf7f030817827073903de5a82e5221cbbeec13ea --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_unsafe_masked_index.h @@ -0,0 +1,31 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_unsafe_masked_index(Tensor self, Tensor mask, Tensor?[] indices, Scalar fill) -> Tensor +inline at::Tensor _unsafe_masked_index(const at::Tensor & self, const at::Tensor & mask, const c10::List<::std::optional> & indices, const at::Scalar & fill) { + return at::_ops::_unsafe_masked_index::call(self, mask, indices, fill); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_unsafe_masked_index_compositeexplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_unsafe_masked_index_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..02f1a3cee61954bb39d1abe9dace12d8a23f8046 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_unsafe_masked_index_compositeexplicitautograd_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor _unsafe_masked_index(const at::Tensor & self, const at::Tensor & mask, const c10::List<::std::optional> & indices, const at::Scalar & fill); + +} // namespace compositeexplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_unsafe_masked_index_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_unsafe_masked_index_native.h new file mode 100644 index 0000000000000000000000000000000000000000..3201eafe6b86b5e3d9e599333ac34b4b7bb78e83 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_unsafe_masked_index_native.h @@ -0,0 +1,21 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor _unsafe_masked_index(const at::Tensor & self, const at::Tensor & mask, const c10::List<::std::optional> & indices, const at::Scalar & fill); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_unsafe_masked_index_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_unsafe_masked_index_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..8093fa0fadac22e574acdd45b81f56246ddc1835 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_unsafe_masked_index_ops.h @@ -0,0 +1,29 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _unsafe_masked_index { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &, const c10::List<::std::optional> &, const at::Scalar &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_unsafe_masked_index"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_unsafe_masked_index(Tensor self, Tensor mask, Tensor?[] indices, Scalar fill) -> Tensor"; + static at::Tensor call(const at::Tensor & self, const at::Tensor & mask, const c10::List<::std::optional> & indices, const at::Scalar & fill); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & mask, const c10::List<::std::optional> & indices, const at::Scalar & fill); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_unsafe_masked_index_put_accumulate.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_unsafe_masked_index_put_accumulate.h new file mode 100644 index 0000000000000000000000000000000000000000..142849c730e426fd161d7f149fd6018884d0120a --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_unsafe_masked_index_put_accumulate.h @@ -0,0 +1,31 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_unsafe_masked_index_put_accumulate(Tensor self, Tensor mask, Tensor?[] indices, Tensor values) -> Tensor +inline at::Tensor _unsafe_masked_index_put_accumulate(const at::Tensor & self, const at::Tensor & mask, const c10::List<::std::optional> & indices, const at::Tensor & values) { + return at::_ops::_unsafe_masked_index_put_accumulate::call(self, mask, indices, values); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_unsafe_masked_index_put_accumulate_compositeexplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_unsafe_masked_index_put_accumulate_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..31548eca53ef952036d90bff32159de2b191256a --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_unsafe_masked_index_put_accumulate_compositeexplicitautograd_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor _unsafe_masked_index_put_accumulate(const at::Tensor & self, const at::Tensor & mask, const c10::List<::std::optional> & indices, const at::Tensor & values); + +} // namespace compositeexplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_unsafe_masked_index_put_accumulate_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_unsafe_masked_index_put_accumulate_native.h new file mode 100644 index 0000000000000000000000000000000000000000..f1bcda308eeca3597fe655db857a985e36177016 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_unsafe_masked_index_put_accumulate_native.h @@ -0,0 +1,21 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor _unsafe_masked_index_put_accumulate(const at::Tensor & self, const at::Tensor & mask, const c10::List<::std::optional> & indices, const at::Tensor & values); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_unsafe_masked_index_put_accumulate_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_unsafe_masked_index_put_accumulate_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..0037f598a9accce6bf61a7b5a8668e449fd7ec95 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_unsafe_masked_index_put_accumulate_ops.h @@ -0,0 +1,29 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _unsafe_masked_index_put_accumulate { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &, const c10::List<::std::optional> &, const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_unsafe_masked_index_put_accumulate"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_unsafe_masked_index_put_accumulate(Tensor self, Tensor mask, Tensor?[] indices, Tensor values) -> Tensor"; + static at::Tensor call(const at::Tensor & self, const at::Tensor & mask, const c10::List<::std::optional> & indices, const at::Tensor & values); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & mask, const c10::List<::std::optional> & indices, const at::Tensor & values); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_unsafe_view.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_unsafe_view.h new file mode 100644 index 0000000000000000000000000000000000000000..f6a3e016192530fa012d3709a1357ae3147d1c54 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_unsafe_view.h @@ -0,0 +1,92 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_unsafe_view(Tensor self, SymInt[] size) -> Tensor +inline at::Tensor _unsafe_view(const at::Tensor & self, at::IntArrayRef size) { + return at::_ops::_unsafe_view::call(self, c10::fromIntArrayRefSlow(size)); +} +namespace symint { + template >> + at::Tensor _unsafe_view(const at::Tensor & self, at::IntArrayRef size) { + return at::_ops::_unsafe_view::call(self, c10::fromIntArrayRefSlow(size)); + } +} + +// aten::_unsafe_view(Tensor self, SymInt[] size) -> Tensor +inline at::Tensor _unsafe_view_symint(const at::Tensor & self, c10::SymIntArrayRef size) { + return at::_ops::_unsafe_view::call(self, size); +} +namespace symint { + template >> + at::Tensor _unsafe_view(const at::Tensor & self, c10::SymIntArrayRef size) { + return at::_ops::_unsafe_view::call(self, size); + } +} + +// aten::_unsafe_view.out(Tensor self, SymInt[] size, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _unsafe_view_out(at::Tensor & out, const at::Tensor & self, at::IntArrayRef size) { + return at::_ops::_unsafe_view_out::call(self, c10::fromIntArrayRefSlow(size), out); +} +namespace symint { + template >> + at::Tensor & _unsafe_view_out(at::Tensor & out, const at::Tensor & self, at::IntArrayRef size) { + return at::_ops::_unsafe_view_out::call(self, c10::fromIntArrayRefSlow(size), out); + } +} + +// aten::_unsafe_view.out(Tensor self, SymInt[] size, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _unsafe_view_outf(const at::Tensor & self, at::IntArrayRef size, at::Tensor & out) { + return at::_ops::_unsafe_view_out::call(self, c10::fromIntArrayRefSlow(size), out); +} +namespace symint { + template >> + at::Tensor & _unsafe_view_outf(const at::Tensor & self, at::IntArrayRef size, at::Tensor & out) { + return at::_ops::_unsafe_view_out::call(self, c10::fromIntArrayRefSlow(size), out); + } +} + +// aten::_unsafe_view.out(Tensor self, SymInt[] size, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _unsafe_view_symint_out(at::Tensor & out, const at::Tensor & self, c10::SymIntArrayRef size) { + return at::_ops::_unsafe_view_out::call(self, size, out); +} +namespace symint { + template >> + at::Tensor & _unsafe_view_out(at::Tensor & out, const at::Tensor & self, c10::SymIntArrayRef size) { + return at::_ops::_unsafe_view_out::call(self, size, out); + } +} + +// aten::_unsafe_view.out(Tensor self, SymInt[] size, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _unsafe_view_symint_outf(const at::Tensor & self, c10::SymIntArrayRef size, at::Tensor & out) { + return at::_ops::_unsafe_view_out::call(self, size, out); +} +namespace symint { + template >> + at::Tensor & _unsafe_view_outf(const at::Tensor & self, c10::SymIntArrayRef size, at::Tensor & out) { + return at::_ops::_unsafe_view_out::call(self, size, out); + } +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_unsafe_view_compositeexplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_unsafe_view_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..46b9b3a1e82ba0a3179c942f76b655419c5d402f --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_unsafe_view_compositeexplicitautograd_dispatch.h @@ -0,0 +1,28 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor _unsafe_view(const at::Tensor & self, at::IntArrayRef size); +TORCH_API at::Tensor _unsafe_view_symint(const at::Tensor & self, c10::SymIntArrayRef size); +TORCH_API at::Tensor & _unsafe_view_out(at::Tensor & out, const at::Tensor & self, at::IntArrayRef size); +TORCH_API at::Tensor & _unsafe_view_outf(const at::Tensor & self, at::IntArrayRef size, at::Tensor & out); +TORCH_API at::Tensor & _unsafe_view_symint_out(at::Tensor & out, const at::Tensor & self, c10::SymIntArrayRef size); +TORCH_API at::Tensor & _unsafe_view_symint_outf(const at::Tensor & self, c10::SymIntArrayRef size, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_unsafe_view_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_unsafe_view_native.h new file mode 100644 index 0000000000000000000000000000000000000000..29b3dc2d7edbd5b4420c03722b3fbc2f5ff35258 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_unsafe_view_native.h @@ -0,0 +1,22 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor _unsafe_view(const at::Tensor & self, at::IntArrayRef size); +TORCH_API at::Tensor & _unsafe_view_out_symint(const at::Tensor & self, c10::SymIntArrayRef size, at::Tensor & out); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_unsafe_view_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_unsafe_view_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..700098f041ce1ab339e578ff604c39a4e154d0fa --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_unsafe_view_ops.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _unsafe_view { + using schema = at::Tensor (const at::Tensor &, c10::SymIntArrayRef); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_unsafe_view"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_unsafe_view(Tensor self, SymInt[] size) -> Tensor"; + static at::Tensor call(const at::Tensor & self, c10::SymIntArrayRef size); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, c10::SymIntArrayRef size); +}; + +struct TORCH_API _unsafe_view_out { + using schema = at::Tensor & (const at::Tensor &, c10::SymIntArrayRef, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_unsafe_view"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_unsafe_view.out(Tensor self, SymInt[] size, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, c10::SymIntArrayRef size, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, c10::SymIntArrayRef size, at::Tensor & out); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bicubic2d_aa.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bicubic2d_aa.h new file mode 100644 index 0000000000000000000000000000000000000000..9bab1972ba37095bc24bfb11b011a04453322b73 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bicubic2d_aa.h @@ -0,0 +1,114 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_upsample_bicubic2d_aa.vec(Tensor input, SymInt[]? output_size, bool align_corners, float[]? scale_factors) -> Tensor +inline at::Tensor _upsample_bicubic2d_aa(const at::Tensor & input, at::OptionalIntArrayRef output_size, bool align_corners, ::std::optional> scale_factors) { + return at::_ops::_upsample_bicubic2d_aa_vec::call(input, output_size.has_value() ? ::std::make_optional(c10::fromIntArrayRefSlow(*output_size)) : ::std::nullopt, align_corners, scale_factors); +} +namespace symint { + template >> + at::Tensor _upsample_bicubic2d_aa(const at::Tensor & input, at::OptionalIntArrayRef output_size, bool align_corners, ::std::optional> scale_factors) { + return at::_ops::_upsample_bicubic2d_aa_vec::call(input, output_size.has_value() ? ::std::make_optional(c10::fromIntArrayRefSlow(*output_size)) : ::std::nullopt, align_corners, scale_factors); + } +} + +// aten::_upsample_bicubic2d_aa.vec(Tensor input, SymInt[]? output_size, bool align_corners, float[]? scale_factors) -> Tensor +inline at::Tensor _upsample_bicubic2d_aa_symint(const at::Tensor & input, at::OptionalSymIntArrayRef output_size, bool align_corners, ::std::optional> scale_factors) { + return at::_ops::_upsample_bicubic2d_aa_vec::call(input, output_size, align_corners, scale_factors); +} +namespace symint { + template >> + at::Tensor _upsample_bicubic2d_aa(const at::Tensor & input, at::OptionalSymIntArrayRef output_size, bool align_corners, ::std::optional> scale_factors) { + return at::_ops::_upsample_bicubic2d_aa_vec::call(input, output_size, align_corners, scale_factors); + } +} + +// aten::_upsample_bicubic2d_aa.out(Tensor self, SymInt[2] output_size, bool align_corners, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _upsample_bicubic2d_aa_out(at::Tensor & out, const at::Tensor & self, at::IntArrayRef output_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_bicubic2d_aa_out::call(self, c10::fromIntArrayRefSlow(output_size), align_corners, scales_h, scales_w, out); +} +namespace symint { + template >> + at::Tensor & _upsample_bicubic2d_aa_out(at::Tensor & out, const at::Tensor & self, at::IntArrayRef output_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_bicubic2d_aa_out::call(self, c10::fromIntArrayRefSlow(output_size), align_corners, scales_h, scales_w, out); + } +} + +// aten::_upsample_bicubic2d_aa.out(Tensor self, SymInt[2] output_size, bool align_corners, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _upsample_bicubic2d_aa_outf(const at::Tensor & self, at::IntArrayRef output_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & out) { + return at::_ops::_upsample_bicubic2d_aa_out::call(self, c10::fromIntArrayRefSlow(output_size), align_corners, scales_h, scales_w, out); +} +namespace symint { + template >> + at::Tensor & _upsample_bicubic2d_aa_outf(const at::Tensor & self, at::IntArrayRef output_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & out) { + return at::_ops::_upsample_bicubic2d_aa_out::call(self, c10::fromIntArrayRefSlow(output_size), align_corners, scales_h, scales_w, out); + } +} + +// aten::_upsample_bicubic2d_aa.out(Tensor self, SymInt[2] output_size, bool align_corners, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _upsample_bicubic2d_aa_symint_out(at::Tensor & out, const at::Tensor & self, c10::SymIntArrayRef output_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_bicubic2d_aa_out::call(self, output_size, align_corners, scales_h, scales_w, out); +} +namespace symint { + template >> + at::Tensor & _upsample_bicubic2d_aa_out(at::Tensor & out, const at::Tensor & self, c10::SymIntArrayRef output_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_bicubic2d_aa_out::call(self, output_size, align_corners, scales_h, scales_w, out); + } +} + +// aten::_upsample_bicubic2d_aa.out(Tensor self, SymInt[2] output_size, bool align_corners, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _upsample_bicubic2d_aa_symint_outf(const at::Tensor & self, c10::SymIntArrayRef output_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & out) { + return at::_ops::_upsample_bicubic2d_aa_out::call(self, output_size, align_corners, scales_h, scales_w, out); +} +namespace symint { + template >> + at::Tensor & _upsample_bicubic2d_aa_outf(const at::Tensor & self, c10::SymIntArrayRef output_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & out) { + return at::_ops::_upsample_bicubic2d_aa_out::call(self, output_size, align_corners, scales_h, scales_w, out); + } +} + +// aten::_upsample_bicubic2d_aa(Tensor self, SymInt[2] output_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor +inline at::Tensor _upsample_bicubic2d_aa(const at::Tensor & self, at::IntArrayRef output_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_bicubic2d_aa::call(self, c10::fromIntArrayRefSlow(output_size), align_corners, scales_h, scales_w); +} +namespace symint { + template >> + at::Tensor _upsample_bicubic2d_aa(const at::Tensor & self, at::IntArrayRef output_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_bicubic2d_aa::call(self, c10::fromIntArrayRefSlow(output_size), align_corners, scales_h, scales_w); + } +} + +// aten::_upsample_bicubic2d_aa(Tensor self, SymInt[2] output_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor +inline at::Tensor _upsample_bicubic2d_aa_symint(const at::Tensor & self, c10::SymIntArrayRef output_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_bicubic2d_aa::call(self, output_size, align_corners, scales_h, scales_w); +} +namespace symint { + template >> + at::Tensor _upsample_bicubic2d_aa(const at::Tensor & self, c10::SymIntArrayRef output_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_bicubic2d_aa::call(self, output_size, align_corners, scales_h, scales_w); + } +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bicubic2d_aa_backward.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bicubic2d_aa_backward.h new file mode 100644 index 0000000000000000000000000000000000000000..4fa4d362ebef10f6068bccd7b95ada1622c851c5 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bicubic2d_aa_backward.h @@ -0,0 +1,92 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_upsample_bicubic2d_aa_backward.grad_input(Tensor grad_output, SymInt[2] output_size, SymInt[4] input_size, bool align_corners, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!) +inline at::Tensor & _upsample_bicubic2d_aa_backward_out(at::Tensor & grad_input, const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_bicubic2d_aa_backward_grad_input::call(grad_output, c10::fromIntArrayRefSlow(output_size), c10::fromIntArrayRefSlow(input_size), align_corners, scales_h, scales_w, grad_input); +} +namespace symint { + template >> + at::Tensor & _upsample_bicubic2d_aa_backward_out(at::Tensor & grad_input, const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_bicubic2d_aa_backward_grad_input::call(grad_output, c10::fromIntArrayRefSlow(output_size), c10::fromIntArrayRefSlow(input_size), align_corners, scales_h, scales_w, grad_input); + } +} + +// aten::_upsample_bicubic2d_aa_backward.grad_input(Tensor grad_output, SymInt[2] output_size, SymInt[4] input_size, bool align_corners, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!) +inline at::Tensor & _upsample_bicubic2d_aa_backward_outf(const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & grad_input) { + return at::_ops::_upsample_bicubic2d_aa_backward_grad_input::call(grad_output, c10::fromIntArrayRefSlow(output_size), c10::fromIntArrayRefSlow(input_size), align_corners, scales_h, scales_w, grad_input); +} +namespace symint { + template >> + at::Tensor & _upsample_bicubic2d_aa_backward_outf(const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & grad_input) { + return at::_ops::_upsample_bicubic2d_aa_backward_grad_input::call(grad_output, c10::fromIntArrayRefSlow(output_size), c10::fromIntArrayRefSlow(input_size), align_corners, scales_h, scales_w, grad_input); + } +} + +// aten::_upsample_bicubic2d_aa_backward.grad_input(Tensor grad_output, SymInt[2] output_size, SymInt[4] input_size, bool align_corners, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!) +inline at::Tensor & _upsample_bicubic2d_aa_backward_symint_out(at::Tensor & grad_input, const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_bicubic2d_aa_backward_grad_input::call(grad_output, output_size, input_size, align_corners, scales_h, scales_w, grad_input); +} +namespace symint { + template >> + at::Tensor & _upsample_bicubic2d_aa_backward_out(at::Tensor & grad_input, const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_bicubic2d_aa_backward_grad_input::call(grad_output, output_size, input_size, align_corners, scales_h, scales_w, grad_input); + } +} + +// aten::_upsample_bicubic2d_aa_backward.grad_input(Tensor grad_output, SymInt[2] output_size, SymInt[4] input_size, bool align_corners, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!) +inline at::Tensor & _upsample_bicubic2d_aa_backward_symint_outf(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & grad_input) { + return at::_ops::_upsample_bicubic2d_aa_backward_grad_input::call(grad_output, output_size, input_size, align_corners, scales_h, scales_w, grad_input); +} +namespace symint { + template >> + at::Tensor & _upsample_bicubic2d_aa_backward_outf(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & grad_input) { + return at::_ops::_upsample_bicubic2d_aa_backward_grad_input::call(grad_output, output_size, input_size, align_corners, scales_h, scales_w, grad_input); + } +} + +// aten::_upsample_bicubic2d_aa_backward(Tensor grad_output, SymInt[2] output_size, SymInt[4] input_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor +inline at::Tensor _upsample_bicubic2d_aa_backward(const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_bicubic2d_aa_backward::call(grad_output, c10::fromIntArrayRefSlow(output_size), c10::fromIntArrayRefSlow(input_size), align_corners, scales_h, scales_w); +} +namespace symint { + template >> + at::Tensor _upsample_bicubic2d_aa_backward(const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_bicubic2d_aa_backward::call(grad_output, c10::fromIntArrayRefSlow(output_size), c10::fromIntArrayRefSlow(input_size), align_corners, scales_h, scales_w); + } +} + +// aten::_upsample_bicubic2d_aa_backward(Tensor grad_output, SymInt[2] output_size, SymInt[4] input_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor +inline at::Tensor _upsample_bicubic2d_aa_backward_symint(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_bicubic2d_aa_backward::call(grad_output, output_size, input_size, align_corners, scales_h, scales_w); +} +namespace symint { + template >> + at::Tensor _upsample_bicubic2d_aa_backward(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_bicubic2d_aa_backward::call(grad_output, output_size, input_size, align_corners, scales_h, scales_w); + } +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bicubic2d_aa_backward_compositeexplicitautogradnonfunctional_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bicubic2d_aa_backward_compositeexplicitautogradnonfunctional_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..ef674805c05320ba7b3565003cad5451ef81dbde --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bicubic2d_aa_backward_compositeexplicitautogradnonfunctional_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautogradnonfunctional { + +TORCH_API at::Tensor _upsample_bicubic2d_aa_backward(const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor _upsample_bicubic2d_aa_backward_symint(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); + +} // namespace compositeexplicitautogradnonfunctional +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bicubic2d_aa_backward_cpu_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bicubic2d_aa_backward_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..8dc332c1bd1debf6696f373f4ab5337a6ee67e28 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bicubic2d_aa_backward_cpu_dispatch.h @@ -0,0 +1,28 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor _upsample_bicubic2d_aa_backward(const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor _upsample_bicubic2d_aa_backward_symint(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor & _upsample_bicubic2d_aa_backward_out(at::Tensor & grad_input, const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor & _upsample_bicubic2d_aa_backward_outf(const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & grad_input); +TORCH_API at::Tensor & _upsample_bicubic2d_aa_backward_symint_out(at::Tensor & grad_input, const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor & _upsample_bicubic2d_aa_backward_symint_outf(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & grad_input); + +} // namespace cpu +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bicubic2d_aa_backward_cuda_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bicubic2d_aa_backward_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..c4e2f3f85a5990c8f068ba334988d19ab2cac837 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bicubic2d_aa_backward_cuda_dispatch.h @@ -0,0 +1,28 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor _upsample_bicubic2d_aa_backward(const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor _upsample_bicubic2d_aa_backward_symint(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor & _upsample_bicubic2d_aa_backward_out(at::Tensor & grad_input, const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor & _upsample_bicubic2d_aa_backward_outf(const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & grad_input); +TORCH_API at::Tensor & _upsample_bicubic2d_aa_backward_symint_out(at::Tensor & grad_input, const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor & _upsample_bicubic2d_aa_backward_symint_outf(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & grad_input); + +} // namespace cuda +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bicubic2d_aa_backward_meta.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bicubic2d_aa_backward_meta.h new file mode 100644 index 0000000000000000000000000000000000000000..80677ae1290a57b9e5a69dfe598a78de3ada8758 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bicubic2d_aa_backward_meta.h @@ -0,0 +1,27 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeMetaFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace meta { + +struct TORCH_API structured__upsample_bicubic2d_aa_backward : public at::impl::MetaBase { + + + void meta(const at::Tensor & grad_output, at::ArrayRef output_size, at::ArrayRef input_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w); +}; + +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bicubic2d_aa_backward_meta_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bicubic2d_aa_backward_meta_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..cde90ae07b031ae5c8409270d6c0ed112c22e637 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bicubic2d_aa_backward_meta_dispatch.h @@ -0,0 +1,28 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace meta { + +TORCH_API at::Tensor _upsample_bicubic2d_aa_backward(const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor _upsample_bicubic2d_aa_backward_symint(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor & _upsample_bicubic2d_aa_backward_out(at::Tensor & grad_input, const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor & _upsample_bicubic2d_aa_backward_outf(const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & grad_input); +TORCH_API at::Tensor & _upsample_bicubic2d_aa_backward_symint_out(at::Tensor & grad_input, const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor & _upsample_bicubic2d_aa_backward_symint_outf(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & grad_input); + +} // namespace meta +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bicubic2d_aa_backward_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bicubic2d_aa_backward_native.h new file mode 100644 index 0000000000000000000000000000000000000000..e847820977efca0d7d85216f95d69ddf0c2b57e7 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bicubic2d_aa_backward_native.h @@ -0,0 +1,26 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace native { +struct TORCH_API structured__upsample_bicubic2d_aa_backward_out_cpu : public at::meta::structured__upsample_bicubic2d_aa_backward { +void impl(const at::Tensor & grad_output, at::ArrayRef output_size, at::ArrayRef input_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w, const at::Tensor & grad_input); +}; +struct TORCH_API structured__upsample_bicubic2d_aa_backward_out_cuda : public at::meta::structured__upsample_bicubic2d_aa_backward { +void impl(const at::Tensor & grad_output, at::ArrayRef output_size, at::ArrayRef input_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w, const at::Tensor & grad_input); +}; +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bicubic2d_aa_backward_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bicubic2d_aa_backward_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..4b771eb139f7eb92dea22549d4f56b39a99b0355 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bicubic2d_aa_backward_ops.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _upsample_bicubic2d_aa_backward_grad_input { + using schema = at::Tensor & (const at::Tensor &, c10::SymIntArrayRef, c10::SymIntArrayRef, bool, ::std::optional, ::std::optional, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_upsample_bicubic2d_aa_backward"; + static constexpr const char* overload_name = "grad_input"; + static constexpr const char* schema_str = "_upsample_bicubic2d_aa_backward.grad_input(Tensor grad_output, SymInt[2] output_size, SymInt[4] input_size, bool align_corners, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & grad_input); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & grad_input); +}; + +struct TORCH_API _upsample_bicubic2d_aa_backward { + using schema = at::Tensor (const at::Tensor &, c10::SymIntArrayRef, c10::SymIntArrayRef, bool, ::std::optional, ::std::optional); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_upsample_bicubic2d_aa_backward"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_upsample_bicubic2d_aa_backward(Tensor grad_output, SymInt[2] output_size, SymInt[4] input_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor"; + static at::Tensor call(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bicubic2d_aa_compositeexplicitautogradnonfunctional_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bicubic2d_aa_compositeexplicitautogradnonfunctional_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..b061fd185931939945577cc0b3718930c2120b35 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bicubic2d_aa_compositeexplicitautogradnonfunctional_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautogradnonfunctional { + +TORCH_API at::Tensor _upsample_bicubic2d_aa(const at::Tensor & self, at::IntArrayRef output_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor _upsample_bicubic2d_aa_symint(const at::Tensor & self, c10::SymIntArrayRef output_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); + +} // namespace compositeexplicitautogradnonfunctional +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bicubic2d_aa_compositeimplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bicubic2d_aa_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..bbc8986751ebc4018ac8f0486a84cc712beb69bc --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bicubic2d_aa_compositeimplicitautograd_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor _upsample_bicubic2d_aa(const at::Tensor & input, at::OptionalIntArrayRef output_size, bool align_corners, ::std::optional> scale_factors); +TORCH_API at::Tensor _upsample_bicubic2d_aa_symint(const at::Tensor & input, at::OptionalSymIntArrayRef output_size, bool align_corners, ::std::optional> scale_factors); + +} // namespace compositeimplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bicubic2d_aa_cpu_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bicubic2d_aa_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..1de390c227a96e438b1238baf2edaddb6354199e --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bicubic2d_aa_cpu_dispatch.h @@ -0,0 +1,28 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor _upsample_bicubic2d_aa(const at::Tensor & self, at::IntArrayRef output_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor _upsample_bicubic2d_aa_symint(const at::Tensor & self, c10::SymIntArrayRef output_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor & _upsample_bicubic2d_aa_out(at::Tensor & out, const at::Tensor & self, at::IntArrayRef output_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor & _upsample_bicubic2d_aa_outf(const at::Tensor & self, at::IntArrayRef output_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & out); +TORCH_API at::Tensor & _upsample_bicubic2d_aa_symint_out(at::Tensor & out, const at::Tensor & self, c10::SymIntArrayRef output_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor & _upsample_bicubic2d_aa_symint_outf(const at::Tensor & self, c10::SymIntArrayRef output_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & out); + +} // namespace cpu +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bicubic2d_aa_cuda_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bicubic2d_aa_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..f71a32a12a34994b3f2b109cc9c9be59b4d2082e --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bicubic2d_aa_cuda_dispatch.h @@ -0,0 +1,28 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor _upsample_bicubic2d_aa(const at::Tensor & self, at::IntArrayRef output_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor _upsample_bicubic2d_aa_symint(const at::Tensor & self, c10::SymIntArrayRef output_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor & _upsample_bicubic2d_aa_out(at::Tensor & out, const at::Tensor & self, at::IntArrayRef output_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor & _upsample_bicubic2d_aa_outf(const at::Tensor & self, at::IntArrayRef output_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & out); +TORCH_API at::Tensor & _upsample_bicubic2d_aa_symint_out(at::Tensor & out, const at::Tensor & self, c10::SymIntArrayRef output_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor & _upsample_bicubic2d_aa_symint_outf(const at::Tensor & self, c10::SymIntArrayRef output_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & out); + +} // namespace cuda +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bicubic2d_aa_meta.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bicubic2d_aa_meta.h new file mode 100644 index 0000000000000000000000000000000000000000..7c0c60428f3ef551d3248619645c8003a7d0e481 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bicubic2d_aa_meta.h @@ -0,0 +1,27 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeMetaFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace meta { + +struct TORCH_API structured__upsample_bicubic2d_aa : public at::impl::MetaBase { + + + void meta(const at::Tensor & self, at::ArrayRef output_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w); +}; + +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bicubic2d_aa_meta_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bicubic2d_aa_meta_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..470d76c2124682a70a523ccc03301d8f850c6a71 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bicubic2d_aa_meta_dispatch.h @@ -0,0 +1,28 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace meta { + +TORCH_API at::Tensor _upsample_bicubic2d_aa(const at::Tensor & self, at::IntArrayRef output_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor _upsample_bicubic2d_aa_symint(const at::Tensor & self, c10::SymIntArrayRef output_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor & _upsample_bicubic2d_aa_out(at::Tensor & out, const at::Tensor & self, at::IntArrayRef output_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor & _upsample_bicubic2d_aa_outf(const at::Tensor & self, at::IntArrayRef output_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & out); +TORCH_API at::Tensor & _upsample_bicubic2d_aa_symint_out(at::Tensor & out, const at::Tensor & self, c10::SymIntArrayRef output_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor & _upsample_bicubic2d_aa_symint_outf(const at::Tensor & self, c10::SymIntArrayRef output_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & out); + +} // namespace meta +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bicubic2d_aa_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bicubic2d_aa_native.h new file mode 100644 index 0000000000000000000000000000000000000000..85bc6369831075b084fee75e7512a7f2c784204e --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bicubic2d_aa_native.h @@ -0,0 +1,27 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace native { +TORCH_API at::Tensor _upsample_bicubic2d_aa(const at::Tensor & input, at::OptionalIntArrayRef output_size, bool align_corners, ::std::optional> scale_factors); +struct TORCH_API structured__upsample_bicubic2d_aa_out_cpu : public at::meta::structured__upsample_bicubic2d_aa { +void impl(const at::Tensor & self, at::ArrayRef output_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w, const at::Tensor & out); +}; +struct TORCH_API structured__upsample_bicubic2d_aa_out_cuda : public at::meta::structured__upsample_bicubic2d_aa { +void impl(const at::Tensor & self, at::ArrayRef output_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w, const at::Tensor & out); +}; +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bicubic2d_aa_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bicubic2d_aa_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..86d0aa938376774be3ebf35eb6cf8d8cf60d469c --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bicubic2d_aa_ops.h @@ -0,0 +1,51 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _upsample_bicubic2d_aa_vec { + using schema = at::Tensor (const at::Tensor &, at::OptionalSymIntArrayRef, bool, ::std::optional>); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_upsample_bicubic2d_aa"; + static constexpr const char* overload_name = "vec"; + static constexpr const char* schema_str = "_upsample_bicubic2d_aa.vec(Tensor input, SymInt[]? output_size, bool align_corners, float[]? scale_factors) -> Tensor"; + static at::Tensor call(const at::Tensor & input, at::OptionalSymIntArrayRef output_size, bool align_corners, ::std::optional> scale_factors); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & input, at::OptionalSymIntArrayRef output_size, bool align_corners, ::std::optional> scale_factors); +}; + +struct TORCH_API _upsample_bicubic2d_aa_out { + using schema = at::Tensor & (const at::Tensor &, c10::SymIntArrayRef, bool, ::std::optional, ::std::optional, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_upsample_bicubic2d_aa"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_upsample_bicubic2d_aa.out(Tensor self, SymInt[2] output_size, bool align_corners, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, c10::SymIntArrayRef output_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, c10::SymIntArrayRef output_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & out); +}; + +struct TORCH_API _upsample_bicubic2d_aa { + using schema = at::Tensor (const at::Tensor &, c10::SymIntArrayRef, bool, ::std::optional, ::std::optional); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_upsample_bicubic2d_aa"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_upsample_bicubic2d_aa(Tensor self, SymInt[2] output_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor"; + static at::Tensor call(const at::Tensor & self, c10::SymIntArrayRef output_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, c10::SymIntArrayRef output_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bilinear2d_aa.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bilinear2d_aa.h new file mode 100644 index 0000000000000000000000000000000000000000..d130646ad29273e551402b50814022e539b09c07 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bilinear2d_aa.h @@ -0,0 +1,114 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_upsample_bilinear2d_aa.vec(Tensor input, SymInt[]? output_size, bool align_corners, float[]? scale_factors) -> Tensor +inline at::Tensor _upsample_bilinear2d_aa(const at::Tensor & input, at::OptionalIntArrayRef output_size, bool align_corners, ::std::optional> scale_factors) { + return at::_ops::_upsample_bilinear2d_aa_vec::call(input, output_size.has_value() ? ::std::make_optional(c10::fromIntArrayRefSlow(*output_size)) : ::std::nullopt, align_corners, scale_factors); +} +namespace symint { + template >> + at::Tensor _upsample_bilinear2d_aa(const at::Tensor & input, at::OptionalIntArrayRef output_size, bool align_corners, ::std::optional> scale_factors) { + return at::_ops::_upsample_bilinear2d_aa_vec::call(input, output_size.has_value() ? ::std::make_optional(c10::fromIntArrayRefSlow(*output_size)) : ::std::nullopt, align_corners, scale_factors); + } +} + +// aten::_upsample_bilinear2d_aa.vec(Tensor input, SymInt[]? output_size, bool align_corners, float[]? scale_factors) -> Tensor +inline at::Tensor _upsample_bilinear2d_aa_symint(const at::Tensor & input, at::OptionalSymIntArrayRef output_size, bool align_corners, ::std::optional> scale_factors) { + return at::_ops::_upsample_bilinear2d_aa_vec::call(input, output_size, align_corners, scale_factors); +} +namespace symint { + template >> + at::Tensor _upsample_bilinear2d_aa(const at::Tensor & input, at::OptionalSymIntArrayRef output_size, bool align_corners, ::std::optional> scale_factors) { + return at::_ops::_upsample_bilinear2d_aa_vec::call(input, output_size, align_corners, scale_factors); + } +} + +// aten::_upsample_bilinear2d_aa.out(Tensor self, SymInt[2] output_size, bool align_corners, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _upsample_bilinear2d_aa_out(at::Tensor & out, const at::Tensor & self, at::IntArrayRef output_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_bilinear2d_aa_out::call(self, c10::fromIntArrayRefSlow(output_size), align_corners, scales_h, scales_w, out); +} +namespace symint { + template >> + at::Tensor & _upsample_bilinear2d_aa_out(at::Tensor & out, const at::Tensor & self, at::IntArrayRef output_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_bilinear2d_aa_out::call(self, c10::fromIntArrayRefSlow(output_size), align_corners, scales_h, scales_w, out); + } +} + +// aten::_upsample_bilinear2d_aa.out(Tensor self, SymInt[2] output_size, bool align_corners, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _upsample_bilinear2d_aa_outf(const at::Tensor & self, at::IntArrayRef output_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & out) { + return at::_ops::_upsample_bilinear2d_aa_out::call(self, c10::fromIntArrayRefSlow(output_size), align_corners, scales_h, scales_w, out); +} +namespace symint { + template >> + at::Tensor & _upsample_bilinear2d_aa_outf(const at::Tensor & self, at::IntArrayRef output_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & out) { + return at::_ops::_upsample_bilinear2d_aa_out::call(self, c10::fromIntArrayRefSlow(output_size), align_corners, scales_h, scales_w, out); + } +} + +// aten::_upsample_bilinear2d_aa.out(Tensor self, SymInt[2] output_size, bool align_corners, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _upsample_bilinear2d_aa_symint_out(at::Tensor & out, const at::Tensor & self, c10::SymIntArrayRef output_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_bilinear2d_aa_out::call(self, output_size, align_corners, scales_h, scales_w, out); +} +namespace symint { + template >> + at::Tensor & _upsample_bilinear2d_aa_out(at::Tensor & out, const at::Tensor & self, c10::SymIntArrayRef output_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_bilinear2d_aa_out::call(self, output_size, align_corners, scales_h, scales_w, out); + } +} + +// aten::_upsample_bilinear2d_aa.out(Tensor self, SymInt[2] output_size, bool align_corners, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _upsample_bilinear2d_aa_symint_outf(const at::Tensor & self, c10::SymIntArrayRef output_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & out) { + return at::_ops::_upsample_bilinear2d_aa_out::call(self, output_size, align_corners, scales_h, scales_w, out); +} +namespace symint { + template >> + at::Tensor & _upsample_bilinear2d_aa_outf(const at::Tensor & self, c10::SymIntArrayRef output_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & out) { + return at::_ops::_upsample_bilinear2d_aa_out::call(self, output_size, align_corners, scales_h, scales_w, out); + } +} + +// aten::_upsample_bilinear2d_aa(Tensor self, SymInt[2] output_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor +inline at::Tensor _upsample_bilinear2d_aa(const at::Tensor & self, at::IntArrayRef output_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_bilinear2d_aa::call(self, c10::fromIntArrayRefSlow(output_size), align_corners, scales_h, scales_w); +} +namespace symint { + template >> + at::Tensor _upsample_bilinear2d_aa(const at::Tensor & self, at::IntArrayRef output_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_bilinear2d_aa::call(self, c10::fromIntArrayRefSlow(output_size), align_corners, scales_h, scales_w); + } +} + +// aten::_upsample_bilinear2d_aa(Tensor self, SymInt[2] output_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor +inline at::Tensor _upsample_bilinear2d_aa_symint(const at::Tensor & self, c10::SymIntArrayRef output_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_bilinear2d_aa::call(self, output_size, align_corners, scales_h, scales_w); +} +namespace symint { + template >> + at::Tensor _upsample_bilinear2d_aa(const at::Tensor & self, c10::SymIntArrayRef output_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_bilinear2d_aa::call(self, output_size, align_corners, scales_h, scales_w); + } +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bilinear2d_aa_backward.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bilinear2d_aa_backward.h new file mode 100644 index 0000000000000000000000000000000000000000..2c404b843a720b8282c86da5d604b4f1f663ed5d --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bilinear2d_aa_backward.h @@ -0,0 +1,92 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_upsample_bilinear2d_aa_backward.grad_input(Tensor grad_output, SymInt[2] output_size, SymInt[4] input_size, bool align_corners, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!) +inline at::Tensor & _upsample_bilinear2d_aa_backward_out(at::Tensor & grad_input, const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_bilinear2d_aa_backward_grad_input::call(grad_output, c10::fromIntArrayRefSlow(output_size), c10::fromIntArrayRefSlow(input_size), align_corners, scales_h, scales_w, grad_input); +} +namespace symint { + template >> + at::Tensor & _upsample_bilinear2d_aa_backward_out(at::Tensor & grad_input, const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_bilinear2d_aa_backward_grad_input::call(grad_output, c10::fromIntArrayRefSlow(output_size), c10::fromIntArrayRefSlow(input_size), align_corners, scales_h, scales_w, grad_input); + } +} + +// aten::_upsample_bilinear2d_aa_backward.grad_input(Tensor grad_output, SymInt[2] output_size, SymInt[4] input_size, bool align_corners, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!) +inline at::Tensor & _upsample_bilinear2d_aa_backward_outf(const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & grad_input) { + return at::_ops::_upsample_bilinear2d_aa_backward_grad_input::call(grad_output, c10::fromIntArrayRefSlow(output_size), c10::fromIntArrayRefSlow(input_size), align_corners, scales_h, scales_w, grad_input); +} +namespace symint { + template >> + at::Tensor & _upsample_bilinear2d_aa_backward_outf(const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & grad_input) { + return at::_ops::_upsample_bilinear2d_aa_backward_grad_input::call(grad_output, c10::fromIntArrayRefSlow(output_size), c10::fromIntArrayRefSlow(input_size), align_corners, scales_h, scales_w, grad_input); + } +} + +// aten::_upsample_bilinear2d_aa_backward.grad_input(Tensor grad_output, SymInt[2] output_size, SymInt[4] input_size, bool align_corners, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!) +inline at::Tensor & _upsample_bilinear2d_aa_backward_symint_out(at::Tensor & grad_input, const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_bilinear2d_aa_backward_grad_input::call(grad_output, output_size, input_size, align_corners, scales_h, scales_w, grad_input); +} +namespace symint { + template >> + at::Tensor & _upsample_bilinear2d_aa_backward_out(at::Tensor & grad_input, const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_bilinear2d_aa_backward_grad_input::call(grad_output, output_size, input_size, align_corners, scales_h, scales_w, grad_input); + } +} + +// aten::_upsample_bilinear2d_aa_backward.grad_input(Tensor grad_output, SymInt[2] output_size, SymInt[4] input_size, bool align_corners, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!) +inline at::Tensor & _upsample_bilinear2d_aa_backward_symint_outf(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & grad_input) { + return at::_ops::_upsample_bilinear2d_aa_backward_grad_input::call(grad_output, output_size, input_size, align_corners, scales_h, scales_w, grad_input); +} +namespace symint { + template >> + at::Tensor & _upsample_bilinear2d_aa_backward_outf(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & grad_input) { + return at::_ops::_upsample_bilinear2d_aa_backward_grad_input::call(grad_output, output_size, input_size, align_corners, scales_h, scales_w, grad_input); + } +} + +// aten::_upsample_bilinear2d_aa_backward(Tensor grad_output, SymInt[2] output_size, SymInt[4] input_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor +inline at::Tensor _upsample_bilinear2d_aa_backward(const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_bilinear2d_aa_backward::call(grad_output, c10::fromIntArrayRefSlow(output_size), c10::fromIntArrayRefSlow(input_size), align_corners, scales_h, scales_w); +} +namespace symint { + template >> + at::Tensor _upsample_bilinear2d_aa_backward(const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_bilinear2d_aa_backward::call(grad_output, c10::fromIntArrayRefSlow(output_size), c10::fromIntArrayRefSlow(input_size), align_corners, scales_h, scales_w); + } +} + +// aten::_upsample_bilinear2d_aa_backward(Tensor grad_output, SymInt[2] output_size, SymInt[4] input_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor +inline at::Tensor _upsample_bilinear2d_aa_backward_symint(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_bilinear2d_aa_backward::call(grad_output, output_size, input_size, align_corners, scales_h, scales_w); +} +namespace symint { + template >> + at::Tensor _upsample_bilinear2d_aa_backward(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_bilinear2d_aa_backward::call(grad_output, output_size, input_size, align_corners, scales_h, scales_w); + } +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bilinear2d_aa_backward_compositeexplicitautogradnonfunctional_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bilinear2d_aa_backward_compositeexplicitautogradnonfunctional_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..b910470acbc2d16e4c24f76a029768fd3f5277c7 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bilinear2d_aa_backward_compositeexplicitautogradnonfunctional_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautogradnonfunctional { + +TORCH_API at::Tensor _upsample_bilinear2d_aa_backward(const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor _upsample_bilinear2d_aa_backward_symint(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); + +} // namespace compositeexplicitautogradnonfunctional +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bilinear2d_aa_backward_cpu_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bilinear2d_aa_backward_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..26032ccb7b99cba32e0d7acd191a357848239714 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bilinear2d_aa_backward_cpu_dispatch.h @@ -0,0 +1,28 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor _upsample_bilinear2d_aa_backward(const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor _upsample_bilinear2d_aa_backward_symint(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor & _upsample_bilinear2d_aa_backward_out(at::Tensor & grad_input, const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor & _upsample_bilinear2d_aa_backward_outf(const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & grad_input); +TORCH_API at::Tensor & _upsample_bilinear2d_aa_backward_symint_out(at::Tensor & grad_input, const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor & _upsample_bilinear2d_aa_backward_symint_outf(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & grad_input); + +} // namespace cpu +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bilinear2d_aa_backward_cuda_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bilinear2d_aa_backward_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..d1c8d24e1c197be1d774f0c56054e87b097c9cb8 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bilinear2d_aa_backward_cuda_dispatch.h @@ -0,0 +1,28 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor _upsample_bilinear2d_aa_backward(const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor _upsample_bilinear2d_aa_backward_symint(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor & _upsample_bilinear2d_aa_backward_out(at::Tensor & grad_input, const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor & _upsample_bilinear2d_aa_backward_outf(const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & grad_input); +TORCH_API at::Tensor & _upsample_bilinear2d_aa_backward_symint_out(at::Tensor & grad_input, const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor & _upsample_bilinear2d_aa_backward_symint_outf(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & grad_input); + +} // namespace cuda +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bilinear2d_aa_backward_meta.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bilinear2d_aa_backward_meta.h new file mode 100644 index 0000000000000000000000000000000000000000..3c6211c2c307ec047d09f01fcf0119b99bef6536 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bilinear2d_aa_backward_meta.h @@ -0,0 +1,27 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeMetaFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace meta { + +struct TORCH_API structured__upsample_bilinear2d_aa_backward : public at::impl::MetaBase { + + + void meta(const at::Tensor & grad_output, at::ArrayRef output_size, at::ArrayRef input_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w); +}; + +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bilinear2d_aa_backward_meta_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bilinear2d_aa_backward_meta_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..9c8e64d811810e8868ec66f9202e42c9fa3979d9 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bilinear2d_aa_backward_meta_dispatch.h @@ -0,0 +1,28 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace meta { + +TORCH_API at::Tensor _upsample_bilinear2d_aa_backward(const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor _upsample_bilinear2d_aa_backward_symint(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor & _upsample_bilinear2d_aa_backward_out(at::Tensor & grad_input, const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor & _upsample_bilinear2d_aa_backward_outf(const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & grad_input); +TORCH_API at::Tensor & _upsample_bilinear2d_aa_backward_symint_out(at::Tensor & grad_input, const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor & _upsample_bilinear2d_aa_backward_symint_outf(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & grad_input); + +} // namespace meta +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bilinear2d_aa_backward_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bilinear2d_aa_backward_native.h new file mode 100644 index 0000000000000000000000000000000000000000..4a2cc3c9832726f987e8e815ad8893bf82e41b79 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bilinear2d_aa_backward_native.h @@ -0,0 +1,26 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace native { +struct TORCH_API structured__upsample_bilinear2d_aa_backward_out_cpu : public at::meta::structured__upsample_bilinear2d_aa_backward { +void impl(const at::Tensor & grad_output, at::ArrayRef output_size, at::ArrayRef input_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w, const at::Tensor & grad_input); +}; +struct TORCH_API structured__upsample_bilinear2d_aa_backward_out_cuda : public at::meta::structured__upsample_bilinear2d_aa_backward { +void impl(const at::Tensor & grad_output, at::ArrayRef output_size, at::ArrayRef input_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w, const at::Tensor & grad_input); +}; +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bilinear2d_aa_backward_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bilinear2d_aa_backward_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..da658b22e53cffa9ce72c1fcf7d1f1fe3c9c43a3 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bilinear2d_aa_backward_ops.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _upsample_bilinear2d_aa_backward_grad_input { + using schema = at::Tensor & (const at::Tensor &, c10::SymIntArrayRef, c10::SymIntArrayRef, bool, ::std::optional, ::std::optional, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_upsample_bilinear2d_aa_backward"; + static constexpr const char* overload_name = "grad_input"; + static constexpr const char* schema_str = "_upsample_bilinear2d_aa_backward.grad_input(Tensor grad_output, SymInt[2] output_size, SymInt[4] input_size, bool align_corners, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & grad_input); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & grad_input); +}; + +struct TORCH_API _upsample_bilinear2d_aa_backward { + using schema = at::Tensor (const at::Tensor &, c10::SymIntArrayRef, c10::SymIntArrayRef, bool, ::std::optional, ::std::optional); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_upsample_bilinear2d_aa_backward"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_upsample_bilinear2d_aa_backward(Tensor grad_output, SymInt[2] output_size, SymInt[4] input_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor"; + static at::Tensor call(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bilinear2d_aa_compositeexplicitautogradnonfunctional_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bilinear2d_aa_compositeexplicitautogradnonfunctional_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..1cfd80190e7f7fec6cff5796116485b8a38f1bbc --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bilinear2d_aa_compositeexplicitautogradnonfunctional_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautogradnonfunctional { + +TORCH_API at::Tensor _upsample_bilinear2d_aa(const at::Tensor & self, at::IntArrayRef output_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor _upsample_bilinear2d_aa_symint(const at::Tensor & self, c10::SymIntArrayRef output_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); + +} // namespace compositeexplicitautogradnonfunctional +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bilinear2d_aa_compositeimplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bilinear2d_aa_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..92d5fc29521124b7308d58a07c10e1f8ff2e5ec1 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bilinear2d_aa_compositeimplicitautograd_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor _upsample_bilinear2d_aa(const at::Tensor & input, at::OptionalIntArrayRef output_size, bool align_corners, ::std::optional> scale_factors); +TORCH_API at::Tensor _upsample_bilinear2d_aa_symint(const at::Tensor & input, at::OptionalSymIntArrayRef output_size, bool align_corners, ::std::optional> scale_factors); + +} // namespace compositeimplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bilinear2d_aa_cpu_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bilinear2d_aa_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..eddc3702a565a4f9a976d7f115089444499f8f85 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bilinear2d_aa_cpu_dispatch.h @@ -0,0 +1,28 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor _upsample_bilinear2d_aa(const at::Tensor & self, at::IntArrayRef output_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor _upsample_bilinear2d_aa_symint(const at::Tensor & self, c10::SymIntArrayRef output_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor & _upsample_bilinear2d_aa_out(at::Tensor & out, const at::Tensor & self, at::IntArrayRef output_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor & _upsample_bilinear2d_aa_outf(const at::Tensor & self, at::IntArrayRef output_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & out); +TORCH_API at::Tensor & _upsample_bilinear2d_aa_symint_out(at::Tensor & out, const at::Tensor & self, c10::SymIntArrayRef output_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor & _upsample_bilinear2d_aa_symint_outf(const at::Tensor & self, c10::SymIntArrayRef output_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & out); + +} // namespace cpu +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bilinear2d_aa_cuda_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bilinear2d_aa_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..ea1eaed37552366e3d483cf8101cc67d33a08041 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bilinear2d_aa_cuda_dispatch.h @@ -0,0 +1,28 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor _upsample_bilinear2d_aa(const at::Tensor & self, at::IntArrayRef output_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor _upsample_bilinear2d_aa_symint(const at::Tensor & self, c10::SymIntArrayRef output_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor & _upsample_bilinear2d_aa_out(at::Tensor & out, const at::Tensor & self, at::IntArrayRef output_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor & _upsample_bilinear2d_aa_outf(const at::Tensor & self, at::IntArrayRef output_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & out); +TORCH_API at::Tensor & _upsample_bilinear2d_aa_symint_out(at::Tensor & out, const at::Tensor & self, c10::SymIntArrayRef output_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor & _upsample_bilinear2d_aa_symint_outf(const at::Tensor & self, c10::SymIntArrayRef output_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & out); + +} // namespace cuda +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bilinear2d_aa_meta.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bilinear2d_aa_meta.h new file mode 100644 index 0000000000000000000000000000000000000000..65227c31f8f66c8b53343d54132747f90ba3a28b --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bilinear2d_aa_meta.h @@ -0,0 +1,27 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeMetaFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace meta { + +struct TORCH_API structured__upsample_bilinear2d_aa : public at::impl::MetaBase { + + + void meta(const at::Tensor & self, at::ArrayRef output_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w); +}; + +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bilinear2d_aa_meta_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bilinear2d_aa_meta_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..8fdf866fd50a497d1e932a584763caf014c9cfe6 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bilinear2d_aa_meta_dispatch.h @@ -0,0 +1,28 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace meta { + +TORCH_API at::Tensor _upsample_bilinear2d_aa(const at::Tensor & self, at::IntArrayRef output_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor _upsample_bilinear2d_aa_symint(const at::Tensor & self, c10::SymIntArrayRef output_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor & _upsample_bilinear2d_aa_out(at::Tensor & out, const at::Tensor & self, at::IntArrayRef output_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor & _upsample_bilinear2d_aa_outf(const at::Tensor & self, at::IntArrayRef output_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & out); +TORCH_API at::Tensor & _upsample_bilinear2d_aa_symint_out(at::Tensor & out, const at::Tensor & self, c10::SymIntArrayRef output_size, bool align_corners, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor & _upsample_bilinear2d_aa_symint_outf(const at::Tensor & self, c10::SymIntArrayRef output_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & out); + +} // namespace meta +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bilinear2d_aa_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bilinear2d_aa_native.h new file mode 100644 index 0000000000000000000000000000000000000000..9265b1d956e306d48478a2a3b43581de0477ee81 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bilinear2d_aa_native.h @@ -0,0 +1,27 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace native { +TORCH_API at::Tensor _upsample_bilinear2d_aa(const at::Tensor & input, at::OptionalIntArrayRef output_size, bool align_corners, ::std::optional> scale_factors); +struct TORCH_API structured__upsample_bilinear2d_aa_out_cpu : public at::meta::structured__upsample_bilinear2d_aa { +void impl(const at::Tensor & self, at::ArrayRef output_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w, const at::Tensor & out); +}; +struct TORCH_API structured__upsample_bilinear2d_aa_out_cuda : public at::meta::structured__upsample_bilinear2d_aa { +void impl(const at::Tensor & self, at::ArrayRef output_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w, const at::Tensor & out); +}; +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bilinear2d_aa_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bilinear2d_aa_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..439599df89989da067564861ec84f2e7c9dcb443 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_bilinear2d_aa_ops.h @@ -0,0 +1,51 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _upsample_bilinear2d_aa_vec { + using schema = at::Tensor (const at::Tensor &, at::OptionalSymIntArrayRef, bool, ::std::optional>); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_upsample_bilinear2d_aa"; + static constexpr const char* overload_name = "vec"; + static constexpr const char* schema_str = "_upsample_bilinear2d_aa.vec(Tensor input, SymInt[]? output_size, bool align_corners, float[]? scale_factors) -> Tensor"; + static at::Tensor call(const at::Tensor & input, at::OptionalSymIntArrayRef output_size, bool align_corners, ::std::optional> scale_factors); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & input, at::OptionalSymIntArrayRef output_size, bool align_corners, ::std::optional> scale_factors); +}; + +struct TORCH_API _upsample_bilinear2d_aa_out { + using schema = at::Tensor & (const at::Tensor &, c10::SymIntArrayRef, bool, ::std::optional, ::std::optional, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_upsample_bilinear2d_aa"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_upsample_bilinear2d_aa.out(Tensor self, SymInt[2] output_size, bool align_corners, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, c10::SymIntArrayRef output_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, c10::SymIntArrayRef output_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & out); +}; + +struct TORCH_API _upsample_bilinear2d_aa { + using schema = at::Tensor (const at::Tensor &, c10::SymIntArrayRef, bool, ::std::optional, ::std::optional); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_upsample_bilinear2d_aa"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_upsample_bilinear2d_aa(Tensor self, SymInt[2] output_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor"; + static at::Tensor call(const at::Tensor & self, c10::SymIntArrayRef output_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, c10::SymIntArrayRef output_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact1d.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact1d.h new file mode 100644 index 0000000000000000000000000000000000000000..508bde27c1e6b62269b8e3b8a126c76d182bf8b3 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact1d.h @@ -0,0 +1,114 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_upsample_nearest_exact1d.vec(Tensor input, SymInt[]? output_size, float[]? scale_factors) -> Tensor +inline at::Tensor _upsample_nearest_exact1d(const at::Tensor & input, at::OptionalIntArrayRef output_size, ::std::optional> scale_factors) { + return at::_ops::_upsample_nearest_exact1d_vec::call(input, output_size.has_value() ? ::std::make_optional(c10::fromIntArrayRefSlow(*output_size)) : ::std::nullopt, scale_factors); +} +namespace symint { + template >> + at::Tensor _upsample_nearest_exact1d(const at::Tensor & input, at::OptionalIntArrayRef output_size, ::std::optional> scale_factors) { + return at::_ops::_upsample_nearest_exact1d_vec::call(input, output_size.has_value() ? ::std::make_optional(c10::fromIntArrayRefSlow(*output_size)) : ::std::nullopt, scale_factors); + } +} + +// aten::_upsample_nearest_exact1d.vec(Tensor input, SymInt[]? output_size, float[]? scale_factors) -> Tensor +inline at::Tensor _upsample_nearest_exact1d_symint(const at::Tensor & input, at::OptionalSymIntArrayRef output_size, ::std::optional> scale_factors) { + return at::_ops::_upsample_nearest_exact1d_vec::call(input, output_size, scale_factors); +} +namespace symint { + template >> + at::Tensor _upsample_nearest_exact1d(const at::Tensor & input, at::OptionalSymIntArrayRef output_size, ::std::optional> scale_factors) { + return at::_ops::_upsample_nearest_exact1d_vec::call(input, output_size, scale_factors); + } +} + +// aten::_upsample_nearest_exact1d.out(Tensor self, SymInt[1] output_size, float? scales=None, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _upsample_nearest_exact1d_out(at::Tensor & out, const at::Tensor & self, at::IntArrayRef output_size, ::std::optional scales=::std::nullopt) { + return at::_ops::_upsample_nearest_exact1d_out::call(self, c10::fromIntArrayRefSlow(output_size), scales, out); +} +namespace symint { + template >> + at::Tensor & _upsample_nearest_exact1d_out(at::Tensor & out, const at::Tensor & self, at::IntArrayRef output_size, ::std::optional scales=::std::nullopt) { + return at::_ops::_upsample_nearest_exact1d_out::call(self, c10::fromIntArrayRefSlow(output_size), scales, out); + } +} + +// aten::_upsample_nearest_exact1d.out(Tensor self, SymInt[1] output_size, float? scales=None, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _upsample_nearest_exact1d_outf(const at::Tensor & self, at::IntArrayRef output_size, ::std::optional scales, at::Tensor & out) { + return at::_ops::_upsample_nearest_exact1d_out::call(self, c10::fromIntArrayRefSlow(output_size), scales, out); +} +namespace symint { + template >> + at::Tensor & _upsample_nearest_exact1d_outf(const at::Tensor & self, at::IntArrayRef output_size, ::std::optional scales, at::Tensor & out) { + return at::_ops::_upsample_nearest_exact1d_out::call(self, c10::fromIntArrayRefSlow(output_size), scales, out); + } +} + +// aten::_upsample_nearest_exact1d.out(Tensor self, SymInt[1] output_size, float? scales=None, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _upsample_nearest_exact1d_symint_out(at::Tensor & out, const at::Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales=::std::nullopt) { + return at::_ops::_upsample_nearest_exact1d_out::call(self, output_size, scales, out); +} +namespace symint { + template >> + at::Tensor & _upsample_nearest_exact1d_out(at::Tensor & out, const at::Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales=::std::nullopt) { + return at::_ops::_upsample_nearest_exact1d_out::call(self, output_size, scales, out); + } +} + +// aten::_upsample_nearest_exact1d.out(Tensor self, SymInt[1] output_size, float? scales=None, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _upsample_nearest_exact1d_symint_outf(const at::Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales, at::Tensor & out) { + return at::_ops::_upsample_nearest_exact1d_out::call(self, output_size, scales, out); +} +namespace symint { + template >> + at::Tensor & _upsample_nearest_exact1d_outf(const at::Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales, at::Tensor & out) { + return at::_ops::_upsample_nearest_exact1d_out::call(self, output_size, scales, out); + } +} + +// aten::_upsample_nearest_exact1d(Tensor self, SymInt[1] output_size, float? scales=None) -> Tensor +inline at::Tensor _upsample_nearest_exact1d(const at::Tensor & self, at::IntArrayRef output_size, ::std::optional scales=::std::nullopt) { + return at::_ops::_upsample_nearest_exact1d::call(self, c10::fromIntArrayRefSlow(output_size), scales); +} +namespace symint { + template >> + at::Tensor _upsample_nearest_exact1d(const at::Tensor & self, at::IntArrayRef output_size, ::std::optional scales=::std::nullopt) { + return at::_ops::_upsample_nearest_exact1d::call(self, c10::fromIntArrayRefSlow(output_size), scales); + } +} + +// aten::_upsample_nearest_exact1d(Tensor self, SymInt[1] output_size, float? scales=None) -> Tensor +inline at::Tensor _upsample_nearest_exact1d_symint(const at::Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales=::std::nullopt) { + return at::_ops::_upsample_nearest_exact1d::call(self, output_size, scales); +} +namespace symint { + template >> + at::Tensor _upsample_nearest_exact1d(const at::Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales=::std::nullopt) { + return at::_ops::_upsample_nearest_exact1d::call(self, output_size, scales); + } +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact1d_backward.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact1d_backward.h new file mode 100644 index 0000000000000000000000000000000000000000..ba472a26f31dd5eddbb2bee965615b50a00aed93 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact1d_backward.h @@ -0,0 +1,92 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_upsample_nearest_exact1d_backward.grad_input(Tensor grad_output, SymInt[1] output_size, SymInt[3] input_size, float? scales=None, *, Tensor(a!) grad_input) -> Tensor(a!) +inline at::Tensor & _upsample_nearest_exact1d_backward_out(at::Tensor & grad_input, const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, ::std::optional scales=::std::nullopt) { + return at::_ops::_upsample_nearest_exact1d_backward_grad_input::call(grad_output, c10::fromIntArrayRefSlow(output_size), c10::fromIntArrayRefSlow(input_size), scales, grad_input); +} +namespace symint { + template >> + at::Tensor & _upsample_nearest_exact1d_backward_out(at::Tensor & grad_input, const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, ::std::optional scales=::std::nullopt) { + return at::_ops::_upsample_nearest_exact1d_backward_grad_input::call(grad_output, c10::fromIntArrayRefSlow(output_size), c10::fromIntArrayRefSlow(input_size), scales, grad_input); + } +} + +// aten::_upsample_nearest_exact1d_backward.grad_input(Tensor grad_output, SymInt[1] output_size, SymInt[3] input_size, float? scales=None, *, Tensor(a!) grad_input) -> Tensor(a!) +inline at::Tensor & _upsample_nearest_exact1d_backward_outf(const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, ::std::optional scales, at::Tensor & grad_input) { + return at::_ops::_upsample_nearest_exact1d_backward_grad_input::call(grad_output, c10::fromIntArrayRefSlow(output_size), c10::fromIntArrayRefSlow(input_size), scales, grad_input); +} +namespace symint { + template >> + at::Tensor & _upsample_nearest_exact1d_backward_outf(const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, ::std::optional scales, at::Tensor & grad_input) { + return at::_ops::_upsample_nearest_exact1d_backward_grad_input::call(grad_output, c10::fromIntArrayRefSlow(output_size), c10::fromIntArrayRefSlow(input_size), scales, grad_input); + } +} + +// aten::_upsample_nearest_exact1d_backward.grad_input(Tensor grad_output, SymInt[1] output_size, SymInt[3] input_size, float? scales=None, *, Tensor(a!) grad_input) -> Tensor(a!) +inline at::Tensor & _upsample_nearest_exact1d_backward_symint_out(at::Tensor & grad_input, const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales=::std::nullopt) { + return at::_ops::_upsample_nearest_exact1d_backward_grad_input::call(grad_output, output_size, input_size, scales, grad_input); +} +namespace symint { + template >> + at::Tensor & _upsample_nearest_exact1d_backward_out(at::Tensor & grad_input, const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales=::std::nullopt) { + return at::_ops::_upsample_nearest_exact1d_backward_grad_input::call(grad_output, output_size, input_size, scales, grad_input); + } +} + +// aten::_upsample_nearest_exact1d_backward.grad_input(Tensor grad_output, SymInt[1] output_size, SymInt[3] input_size, float? scales=None, *, Tensor(a!) grad_input) -> Tensor(a!) +inline at::Tensor & _upsample_nearest_exact1d_backward_symint_outf(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales, at::Tensor & grad_input) { + return at::_ops::_upsample_nearest_exact1d_backward_grad_input::call(grad_output, output_size, input_size, scales, grad_input); +} +namespace symint { + template >> + at::Tensor & _upsample_nearest_exact1d_backward_outf(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales, at::Tensor & grad_input) { + return at::_ops::_upsample_nearest_exact1d_backward_grad_input::call(grad_output, output_size, input_size, scales, grad_input); + } +} + +// aten::_upsample_nearest_exact1d_backward(Tensor grad_output, SymInt[1] output_size, SymInt[3] input_size, float? scales=None) -> Tensor +inline at::Tensor _upsample_nearest_exact1d_backward(const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, ::std::optional scales=::std::nullopt) { + return at::_ops::_upsample_nearest_exact1d_backward::call(grad_output, c10::fromIntArrayRefSlow(output_size), c10::fromIntArrayRefSlow(input_size), scales); +} +namespace symint { + template >> + at::Tensor _upsample_nearest_exact1d_backward(const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, ::std::optional scales=::std::nullopt) { + return at::_ops::_upsample_nearest_exact1d_backward::call(grad_output, c10::fromIntArrayRefSlow(output_size), c10::fromIntArrayRefSlow(input_size), scales); + } +} + +// aten::_upsample_nearest_exact1d_backward(Tensor grad_output, SymInt[1] output_size, SymInt[3] input_size, float? scales=None) -> Tensor +inline at::Tensor _upsample_nearest_exact1d_backward_symint(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales=::std::nullopt) { + return at::_ops::_upsample_nearest_exact1d_backward::call(grad_output, output_size, input_size, scales); +} +namespace symint { + template >> + at::Tensor _upsample_nearest_exact1d_backward(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales=::std::nullopt) { + return at::_ops::_upsample_nearest_exact1d_backward::call(grad_output, output_size, input_size, scales); + } +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact1d_backward_compositeexplicitautogradnonfunctional_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact1d_backward_compositeexplicitautogradnonfunctional_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..03a438085ff74dd6c46e2222746b9538288df775 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact1d_backward_compositeexplicitautogradnonfunctional_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautogradnonfunctional { + +TORCH_API at::Tensor _upsample_nearest_exact1d_backward(const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, ::std::optional scales=::std::nullopt); +TORCH_API at::Tensor _upsample_nearest_exact1d_backward_symint(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales=::std::nullopt); + +} // namespace compositeexplicitautogradnonfunctional +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact1d_backward_cpu_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact1d_backward_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..843c94fa41c79d16e7745a1f297a49f219388fda --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact1d_backward_cpu_dispatch.h @@ -0,0 +1,28 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor _upsample_nearest_exact1d_backward(const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, ::std::optional scales=::std::nullopt); +TORCH_API at::Tensor _upsample_nearest_exact1d_backward_symint(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales=::std::nullopt); +TORCH_API at::Tensor & _upsample_nearest_exact1d_backward_out(at::Tensor & grad_input, const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, ::std::optional scales=::std::nullopt); +TORCH_API at::Tensor & _upsample_nearest_exact1d_backward_outf(const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, ::std::optional scales, at::Tensor & grad_input); +TORCH_API at::Tensor & _upsample_nearest_exact1d_backward_symint_out(at::Tensor & grad_input, const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales=::std::nullopt); +TORCH_API at::Tensor & _upsample_nearest_exact1d_backward_symint_outf(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales, at::Tensor & grad_input); + +} // namespace cpu +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact1d_backward_cuda_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact1d_backward_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..55a4b85b02dc70ed208cbf4e5226792315e261ea --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact1d_backward_cuda_dispatch.h @@ -0,0 +1,28 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor _upsample_nearest_exact1d_backward(const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, ::std::optional scales=::std::nullopt); +TORCH_API at::Tensor _upsample_nearest_exact1d_backward_symint(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales=::std::nullopt); +TORCH_API at::Tensor & _upsample_nearest_exact1d_backward_out(at::Tensor & grad_input, const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, ::std::optional scales=::std::nullopt); +TORCH_API at::Tensor & _upsample_nearest_exact1d_backward_outf(const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, ::std::optional scales, at::Tensor & grad_input); +TORCH_API at::Tensor & _upsample_nearest_exact1d_backward_symint_out(at::Tensor & grad_input, const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales=::std::nullopt); +TORCH_API at::Tensor & _upsample_nearest_exact1d_backward_symint_outf(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales, at::Tensor & grad_input); + +} // namespace cuda +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact1d_backward_meta.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact1d_backward_meta.h new file mode 100644 index 0000000000000000000000000000000000000000..b911f80f0f4803f55f4e2dc16da71d4aa6aa7e02 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact1d_backward_meta.h @@ -0,0 +1,27 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeMetaFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace meta { + +struct TORCH_API structured__upsample_nearest_exact1d_backward : public at::impl::MetaBase { + + + void meta(const at::Tensor & grad_output, at::ArrayRef output_size, at::ArrayRef input_size, ::std::optional scales); +}; + +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact1d_backward_meta_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact1d_backward_meta_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..823674c5a182e7f734126b79c151cc3d9c0e00fe --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact1d_backward_meta_dispatch.h @@ -0,0 +1,28 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace meta { + +TORCH_API at::Tensor _upsample_nearest_exact1d_backward(const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, ::std::optional scales=::std::nullopt); +TORCH_API at::Tensor _upsample_nearest_exact1d_backward_symint(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales=::std::nullopt); +TORCH_API at::Tensor & _upsample_nearest_exact1d_backward_out(at::Tensor & grad_input, const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, ::std::optional scales=::std::nullopt); +TORCH_API at::Tensor & _upsample_nearest_exact1d_backward_outf(const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, ::std::optional scales, at::Tensor & grad_input); +TORCH_API at::Tensor & _upsample_nearest_exact1d_backward_symint_out(at::Tensor & grad_input, const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales=::std::nullopt); +TORCH_API at::Tensor & _upsample_nearest_exact1d_backward_symint_outf(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales, at::Tensor & grad_input); + +} // namespace meta +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact1d_backward_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact1d_backward_native.h new file mode 100644 index 0000000000000000000000000000000000000000..22064adee3c8d62e550555bd881519c3ea1495eb --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact1d_backward_native.h @@ -0,0 +1,26 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace native { +struct TORCH_API structured__upsample_nearest_exact1d_backward_out_cpu : public at::meta::structured__upsample_nearest_exact1d_backward { +void impl(const at::Tensor & grad_output, at::ArrayRef output_size, at::ArrayRef input_size, ::std::optional scales, const at::Tensor & grad_input); +}; +struct TORCH_API structured__upsample_nearest_exact1d_backward_out_cuda : public at::meta::structured__upsample_nearest_exact1d_backward { +void impl(const at::Tensor & grad_output, at::ArrayRef output_size, at::ArrayRef input_size, ::std::optional scales, const at::Tensor & grad_input); +}; +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact1d_backward_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact1d_backward_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..8acd70009a8812028df74181161c522ad72bf5da --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact1d_backward_ops.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _upsample_nearest_exact1d_backward_grad_input { + using schema = at::Tensor & (const at::Tensor &, c10::SymIntArrayRef, c10::SymIntArrayRef, ::std::optional, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_upsample_nearest_exact1d_backward"; + static constexpr const char* overload_name = "grad_input"; + static constexpr const char* schema_str = "_upsample_nearest_exact1d_backward.grad_input(Tensor grad_output, SymInt[1] output_size, SymInt[3] input_size, float? scales=None, *, Tensor(a!) grad_input) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales, at::Tensor & grad_input); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales, at::Tensor & grad_input); +}; + +struct TORCH_API _upsample_nearest_exact1d_backward { + using schema = at::Tensor (const at::Tensor &, c10::SymIntArrayRef, c10::SymIntArrayRef, ::std::optional); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_upsample_nearest_exact1d_backward"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_upsample_nearest_exact1d_backward(Tensor grad_output, SymInt[1] output_size, SymInt[3] input_size, float? scales=None) -> Tensor"; + static at::Tensor call(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact1d_compositeexplicitautogradnonfunctional_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact1d_compositeexplicitautogradnonfunctional_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..a1d416c21b6208681bd2491bd27e06c204591270 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact1d_compositeexplicitautogradnonfunctional_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautogradnonfunctional { + +TORCH_API at::Tensor _upsample_nearest_exact1d(const at::Tensor & self, at::IntArrayRef output_size, ::std::optional scales=::std::nullopt); +TORCH_API at::Tensor _upsample_nearest_exact1d_symint(const at::Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales=::std::nullopt); + +} // namespace compositeexplicitautogradnonfunctional +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact1d_compositeimplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact1d_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..ce4cfbfc5e13c502a387b506410c80b9d81fa600 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact1d_compositeimplicitautograd_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor _upsample_nearest_exact1d(const at::Tensor & input, at::OptionalIntArrayRef output_size, ::std::optional> scale_factors); +TORCH_API at::Tensor _upsample_nearest_exact1d_symint(const at::Tensor & input, at::OptionalSymIntArrayRef output_size, ::std::optional> scale_factors); + +} // namespace compositeimplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact1d_cpu_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact1d_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..85f5d91c6ede80d3d3765bf83e51537309d2cfbb --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact1d_cpu_dispatch.h @@ -0,0 +1,28 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor _upsample_nearest_exact1d(const at::Tensor & self, at::IntArrayRef output_size, ::std::optional scales=::std::nullopt); +TORCH_API at::Tensor _upsample_nearest_exact1d_symint(const at::Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales=::std::nullopt); +TORCH_API at::Tensor & _upsample_nearest_exact1d_out(at::Tensor & out, const at::Tensor & self, at::IntArrayRef output_size, ::std::optional scales=::std::nullopt); +TORCH_API at::Tensor & _upsample_nearest_exact1d_outf(const at::Tensor & self, at::IntArrayRef output_size, ::std::optional scales, at::Tensor & out); +TORCH_API at::Tensor & _upsample_nearest_exact1d_symint_out(at::Tensor & out, const at::Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales=::std::nullopt); +TORCH_API at::Tensor & _upsample_nearest_exact1d_symint_outf(const at::Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales, at::Tensor & out); + +} // namespace cpu +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact1d_cuda_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact1d_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..71b90af2c6223db48ea11da83542536a3fda676f --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact1d_cuda_dispatch.h @@ -0,0 +1,28 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor _upsample_nearest_exact1d(const at::Tensor & self, at::IntArrayRef output_size, ::std::optional scales=::std::nullopt); +TORCH_API at::Tensor _upsample_nearest_exact1d_symint(const at::Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales=::std::nullopt); +TORCH_API at::Tensor & _upsample_nearest_exact1d_out(at::Tensor & out, const at::Tensor & self, at::IntArrayRef output_size, ::std::optional scales=::std::nullopt); +TORCH_API at::Tensor & _upsample_nearest_exact1d_outf(const at::Tensor & self, at::IntArrayRef output_size, ::std::optional scales, at::Tensor & out); +TORCH_API at::Tensor & _upsample_nearest_exact1d_symint_out(at::Tensor & out, const at::Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales=::std::nullopt); +TORCH_API at::Tensor & _upsample_nearest_exact1d_symint_outf(const at::Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales, at::Tensor & out); + +} // namespace cuda +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact1d_meta.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact1d_meta.h new file mode 100644 index 0000000000000000000000000000000000000000..09eaa8ffe3b6a845359a61ead2007516f57e0b84 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact1d_meta.h @@ -0,0 +1,27 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeMetaFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace meta { + +struct TORCH_API structured__upsample_nearest_exact1d : public at::impl::MetaBase { + + + void meta(const at::Tensor & self, at::ArrayRef output_size, ::std::optional scales); +}; + +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact1d_meta_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact1d_meta_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..e536ccdd0dd9f37d93ce17c6ba6f6bbd7bdc8f44 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact1d_meta_dispatch.h @@ -0,0 +1,28 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace meta { + +TORCH_API at::Tensor _upsample_nearest_exact1d(const at::Tensor & self, at::IntArrayRef output_size, ::std::optional scales=::std::nullopt); +TORCH_API at::Tensor _upsample_nearest_exact1d_symint(const at::Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales=::std::nullopt); +TORCH_API at::Tensor & _upsample_nearest_exact1d_out(at::Tensor & out, const at::Tensor & self, at::IntArrayRef output_size, ::std::optional scales=::std::nullopt); +TORCH_API at::Tensor & _upsample_nearest_exact1d_outf(const at::Tensor & self, at::IntArrayRef output_size, ::std::optional scales, at::Tensor & out); +TORCH_API at::Tensor & _upsample_nearest_exact1d_symint_out(at::Tensor & out, const at::Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales=::std::nullopt); +TORCH_API at::Tensor & _upsample_nearest_exact1d_symint_outf(const at::Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales, at::Tensor & out); + +} // namespace meta +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact1d_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact1d_native.h new file mode 100644 index 0000000000000000000000000000000000000000..18bb508521ac9b044b79fce66579eaa424b00298 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact1d_native.h @@ -0,0 +1,27 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace native { +TORCH_API at::Tensor _upsample_nearest_exact1d(const at::Tensor & input, at::OptionalIntArrayRef output_size, ::std::optional> scale_factors); +struct TORCH_API structured__upsample_nearest_exact1d_out_cpu : public at::meta::structured__upsample_nearest_exact1d { +void impl(const at::Tensor & self, at::ArrayRef output_size, ::std::optional scales, const at::Tensor & out); +}; +struct TORCH_API structured__upsample_nearest_exact1d_out_cuda : public at::meta::structured__upsample_nearest_exact1d { +void impl(const at::Tensor & self, at::ArrayRef output_size, ::std::optional scales, const at::Tensor & out); +}; +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact1d_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact1d_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..7a026d8a1a0f991dbadf06e6827208747bbea6ba --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact1d_ops.h @@ -0,0 +1,51 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _upsample_nearest_exact1d_vec { + using schema = at::Tensor (const at::Tensor &, at::OptionalSymIntArrayRef, ::std::optional>); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_upsample_nearest_exact1d"; + static constexpr const char* overload_name = "vec"; + static constexpr const char* schema_str = "_upsample_nearest_exact1d.vec(Tensor input, SymInt[]? output_size, float[]? scale_factors) -> Tensor"; + static at::Tensor call(const at::Tensor & input, at::OptionalSymIntArrayRef output_size, ::std::optional> scale_factors); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & input, at::OptionalSymIntArrayRef output_size, ::std::optional> scale_factors); +}; + +struct TORCH_API _upsample_nearest_exact1d_out { + using schema = at::Tensor & (const at::Tensor &, c10::SymIntArrayRef, ::std::optional, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_upsample_nearest_exact1d"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_upsample_nearest_exact1d.out(Tensor self, SymInt[1] output_size, float? scales=None, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales, at::Tensor & out); +}; + +struct TORCH_API _upsample_nearest_exact1d { + using schema = at::Tensor (const at::Tensor &, c10::SymIntArrayRef, ::std::optional); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_upsample_nearest_exact1d"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_upsample_nearest_exact1d(Tensor self, SymInt[1] output_size, float? scales=None) -> Tensor"; + static at::Tensor call(const at::Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact2d.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact2d.h new file mode 100644 index 0000000000000000000000000000000000000000..b673b3cd2485ab8daa31e02dd28414112587b247 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact2d.h @@ -0,0 +1,114 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_upsample_nearest_exact2d.vec(Tensor input, SymInt[]? output_size, float[]? scale_factors) -> Tensor +inline at::Tensor _upsample_nearest_exact2d(const at::Tensor & input, at::OptionalIntArrayRef output_size, ::std::optional> scale_factors) { + return at::_ops::_upsample_nearest_exact2d_vec::call(input, output_size.has_value() ? ::std::make_optional(c10::fromIntArrayRefSlow(*output_size)) : ::std::nullopt, scale_factors); +} +namespace symint { + template >> + at::Tensor _upsample_nearest_exact2d(const at::Tensor & input, at::OptionalIntArrayRef output_size, ::std::optional> scale_factors) { + return at::_ops::_upsample_nearest_exact2d_vec::call(input, output_size.has_value() ? ::std::make_optional(c10::fromIntArrayRefSlow(*output_size)) : ::std::nullopt, scale_factors); + } +} + +// aten::_upsample_nearest_exact2d.vec(Tensor input, SymInt[]? output_size, float[]? scale_factors) -> Tensor +inline at::Tensor _upsample_nearest_exact2d_symint(const at::Tensor & input, at::OptionalSymIntArrayRef output_size, ::std::optional> scale_factors) { + return at::_ops::_upsample_nearest_exact2d_vec::call(input, output_size, scale_factors); +} +namespace symint { + template >> + at::Tensor _upsample_nearest_exact2d(const at::Tensor & input, at::OptionalSymIntArrayRef output_size, ::std::optional> scale_factors) { + return at::_ops::_upsample_nearest_exact2d_vec::call(input, output_size, scale_factors); + } +} + +// aten::_upsample_nearest_exact2d.out(Tensor self, SymInt[2] output_size, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _upsample_nearest_exact2d_out(at::Tensor & out, const at::Tensor & self, at::IntArrayRef output_size, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_nearest_exact2d_out::call(self, c10::fromIntArrayRefSlow(output_size), scales_h, scales_w, out); +} +namespace symint { + template >> + at::Tensor & _upsample_nearest_exact2d_out(at::Tensor & out, const at::Tensor & self, at::IntArrayRef output_size, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_nearest_exact2d_out::call(self, c10::fromIntArrayRefSlow(output_size), scales_h, scales_w, out); + } +} + +// aten::_upsample_nearest_exact2d.out(Tensor self, SymInt[2] output_size, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _upsample_nearest_exact2d_outf(const at::Tensor & self, at::IntArrayRef output_size, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & out) { + return at::_ops::_upsample_nearest_exact2d_out::call(self, c10::fromIntArrayRefSlow(output_size), scales_h, scales_w, out); +} +namespace symint { + template >> + at::Tensor & _upsample_nearest_exact2d_outf(const at::Tensor & self, at::IntArrayRef output_size, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & out) { + return at::_ops::_upsample_nearest_exact2d_out::call(self, c10::fromIntArrayRefSlow(output_size), scales_h, scales_w, out); + } +} + +// aten::_upsample_nearest_exact2d.out(Tensor self, SymInt[2] output_size, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _upsample_nearest_exact2d_symint_out(at::Tensor & out, const at::Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_nearest_exact2d_out::call(self, output_size, scales_h, scales_w, out); +} +namespace symint { + template >> + at::Tensor & _upsample_nearest_exact2d_out(at::Tensor & out, const at::Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_nearest_exact2d_out::call(self, output_size, scales_h, scales_w, out); + } +} + +// aten::_upsample_nearest_exact2d.out(Tensor self, SymInt[2] output_size, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _upsample_nearest_exact2d_symint_outf(const at::Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & out) { + return at::_ops::_upsample_nearest_exact2d_out::call(self, output_size, scales_h, scales_w, out); +} +namespace symint { + template >> + at::Tensor & _upsample_nearest_exact2d_outf(const at::Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & out) { + return at::_ops::_upsample_nearest_exact2d_out::call(self, output_size, scales_h, scales_w, out); + } +} + +// aten::_upsample_nearest_exact2d(Tensor self, SymInt[2] output_size, float? scales_h=None, float? scales_w=None) -> Tensor +inline at::Tensor _upsample_nearest_exact2d(const at::Tensor & self, at::IntArrayRef output_size, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_nearest_exact2d::call(self, c10::fromIntArrayRefSlow(output_size), scales_h, scales_w); +} +namespace symint { + template >> + at::Tensor _upsample_nearest_exact2d(const at::Tensor & self, at::IntArrayRef output_size, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_nearest_exact2d::call(self, c10::fromIntArrayRefSlow(output_size), scales_h, scales_w); + } +} + +// aten::_upsample_nearest_exact2d(Tensor self, SymInt[2] output_size, float? scales_h=None, float? scales_w=None) -> Tensor +inline at::Tensor _upsample_nearest_exact2d_symint(const at::Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_nearest_exact2d::call(self, output_size, scales_h, scales_w); +} +namespace symint { + template >> + at::Tensor _upsample_nearest_exact2d(const at::Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_nearest_exact2d::call(self, output_size, scales_h, scales_w); + } +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact2d_backward.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact2d_backward.h new file mode 100644 index 0000000000000000000000000000000000000000..2c0a44947301b326c309cd5cea6fcf3e71e40d03 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact2d_backward.h @@ -0,0 +1,92 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_upsample_nearest_exact2d_backward.grad_input(Tensor grad_output, SymInt[2] output_size, SymInt[4] input_size, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!) +inline at::Tensor & _upsample_nearest_exact2d_backward_out(at::Tensor & grad_input, const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_nearest_exact2d_backward_grad_input::call(grad_output, c10::fromIntArrayRefSlow(output_size), c10::fromIntArrayRefSlow(input_size), scales_h, scales_w, grad_input); +} +namespace symint { + template >> + at::Tensor & _upsample_nearest_exact2d_backward_out(at::Tensor & grad_input, const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_nearest_exact2d_backward_grad_input::call(grad_output, c10::fromIntArrayRefSlow(output_size), c10::fromIntArrayRefSlow(input_size), scales_h, scales_w, grad_input); + } +} + +// aten::_upsample_nearest_exact2d_backward.grad_input(Tensor grad_output, SymInt[2] output_size, SymInt[4] input_size, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!) +inline at::Tensor & _upsample_nearest_exact2d_backward_outf(const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & grad_input) { + return at::_ops::_upsample_nearest_exact2d_backward_grad_input::call(grad_output, c10::fromIntArrayRefSlow(output_size), c10::fromIntArrayRefSlow(input_size), scales_h, scales_w, grad_input); +} +namespace symint { + template >> + at::Tensor & _upsample_nearest_exact2d_backward_outf(const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & grad_input) { + return at::_ops::_upsample_nearest_exact2d_backward_grad_input::call(grad_output, c10::fromIntArrayRefSlow(output_size), c10::fromIntArrayRefSlow(input_size), scales_h, scales_w, grad_input); + } +} + +// aten::_upsample_nearest_exact2d_backward.grad_input(Tensor grad_output, SymInt[2] output_size, SymInt[4] input_size, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!) +inline at::Tensor & _upsample_nearest_exact2d_backward_symint_out(at::Tensor & grad_input, const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_nearest_exact2d_backward_grad_input::call(grad_output, output_size, input_size, scales_h, scales_w, grad_input); +} +namespace symint { + template >> + at::Tensor & _upsample_nearest_exact2d_backward_out(at::Tensor & grad_input, const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_nearest_exact2d_backward_grad_input::call(grad_output, output_size, input_size, scales_h, scales_w, grad_input); + } +} + +// aten::_upsample_nearest_exact2d_backward.grad_input(Tensor grad_output, SymInt[2] output_size, SymInt[4] input_size, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!) +inline at::Tensor & _upsample_nearest_exact2d_backward_symint_outf(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & grad_input) { + return at::_ops::_upsample_nearest_exact2d_backward_grad_input::call(grad_output, output_size, input_size, scales_h, scales_w, grad_input); +} +namespace symint { + template >> + at::Tensor & _upsample_nearest_exact2d_backward_outf(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & grad_input) { + return at::_ops::_upsample_nearest_exact2d_backward_grad_input::call(grad_output, output_size, input_size, scales_h, scales_w, grad_input); + } +} + +// aten::_upsample_nearest_exact2d_backward(Tensor grad_output, SymInt[2] output_size, SymInt[4] input_size, float? scales_h=None, float? scales_w=None) -> Tensor +inline at::Tensor _upsample_nearest_exact2d_backward(const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_nearest_exact2d_backward::call(grad_output, c10::fromIntArrayRefSlow(output_size), c10::fromIntArrayRefSlow(input_size), scales_h, scales_w); +} +namespace symint { + template >> + at::Tensor _upsample_nearest_exact2d_backward(const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_nearest_exact2d_backward::call(grad_output, c10::fromIntArrayRefSlow(output_size), c10::fromIntArrayRefSlow(input_size), scales_h, scales_w); + } +} + +// aten::_upsample_nearest_exact2d_backward(Tensor grad_output, SymInt[2] output_size, SymInt[4] input_size, float? scales_h=None, float? scales_w=None) -> Tensor +inline at::Tensor _upsample_nearest_exact2d_backward_symint(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_nearest_exact2d_backward::call(grad_output, output_size, input_size, scales_h, scales_w); +} +namespace symint { + template >> + at::Tensor _upsample_nearest_exact2d_backward(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_nearest_exact2d_backward::call(grad_output, output_size, input_size, scales_h, scales_w); + } +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact2d_backward_compositeexplicitautogradnonfunctional_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact2d_backward_compositeexplicitautogradnonfunctional_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..4415916eb83cb083172a216dfd9e8239990a9a26 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact2d_backward_compositeexplicitautogradnonfunctional_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautogradnonfunctional { + +TORCH_API at::Tensor _upsample_nearest_exact2d_backward(const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor _upsample_nearest_exact2d_backward_symint(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); + +} // namespace compositeexplicitautogradnonfunctional +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact2d_backward_cpu_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact2d_backward_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..76cb9a191125333af150d02821097fd4dcb251eb --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact2d_backward_cpu_dispatch.h @@ -0,0 +1,28 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor _upsample_nearest_exact2d_backward(const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor _upsample_nearest_exact2d_backward_symint(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor & _upsample_nearest_exact2d_backward_out(at::Tensor & grad_input, const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor & _upsample_nearest_exact2d_backward_outf(const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & grad_input); +TORCH_API at::Tensor & _upsample_nearest_exact2d_backward_symint_out(at::Tensor & grad_input, const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor & _upsample_nearest_exact2d_backward_symint_outf(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & grad_input); + +} // namespace cpu +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact2d_backward_cuda_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact2d_backward_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..714a63e7211fa346be841d9146a1db7ffaebb812 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact2d_backward_cuda_dispatch.h @@ -0,0 +1,28 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor _upsample_nearest_exact2d_backward(const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor _upsample_nearest_exact2d_backward_symint(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor & _upsample_nearest_exact2d_backward_out(at::Tensor & grad_input, const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor & _upsample_nearest_exact2d_backward_outf(const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & grad_input); +TORCH_API at::Tensor & _upsample_nearest_exact2d_backward_symint_out(at::Tensor & grad_input, const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor & _upsample_nearest_exact2d_backward_symint_outf(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & grad_input); + +} // namespace cuda +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact2d_backward_meta.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact2d_backward_meta.h new file mode 100644 index 0000000000000000000000000000000000000000..de623084cf41af7913e2f12f1e79275519d5b2e7 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact2d_backward_meta.h @@ -0,0 +1,27 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeMetaFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace meta { + +struct TORCH_API structured__upsample_nearest_exact2d_backward : public at::impl::MetaBase { + + + void meta(const at::Tensor & grad_output, at::ArrayRef output_size, at::ArrayRef input_size, ::std::optional scales_h, ::std::optional scales_w); +}; + +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact2d_backward_meta_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact2d_backward_meta_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..5c32a4e53b5710bec7bafb1bba5c7447992346fb --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact2d_backward_meta_dispatch.h @@ -0,0 +1,28 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace meta { + +TORCH_API at::Tensor _upsample_nearest_exact2d_backward(const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor _upsample_nearest_exact2d_backward_symint(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor & _upsample_nearest_exact2d_backward_out(at::Tensor & grad_input, const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor & _upsample_nearest_exact2d_backward_outf(const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & grad_input); +TORCH_API at::Tensor & _upsample_nearest_exact2d_backward_symint_out(at::Tensor & grad_input, const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor & _upsample_nearest_exact2d_backward_symint_outf(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & grad_input); + +} // namespace meta +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact2d_backward_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact2d_backward_native.h new file mode 100644 index 0000000000000000000000000000000000000000..4ec78f6779d9a89143d9bb9d03b3893b6c8568e5 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact2d_backward_native.h @@ -0,0 +1,26 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace native { +struct TORCH_API structured__upsample_nearest_exact2d_backward_out_cpu : public at::meta::structured__upsample_nearest_exact2d_backward { +void impl(const at::Tensor & grad_output, at::ArrayRef output_size, at::ArrayRef input_size, ::std::optional scales_h, ::std::optional scales_w, const at::Tensor & grad_input); +}; +struct TORCH_API structured__upsample_nearest_exact2d_backward_out_cuda : public at::meta::structured__upsample_nearest_exact2d_backward { +void impl(const at::Tensor & grad_output, at::ArrayRef output_size, at::ArrayRef input_size, ::std::optional scales_h, ::std::optional scales_w, const at::Tensor & grad_input); +}; +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact2d_backward_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact2d_backward_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..d2258e168e73ae19e5b5dc8f5c1d21338f14c452 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact2d_backward_ops.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _upsample_nearest_exact2d_backward_grad_input { + using schema = at::Tensor & (const at::Tensor &, c10::SymIntArrayRef, c10::SymIntArrayRef, ::std::optional, ::std::optional, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_upsample_nearest_exact2d_backward"; + static constexpr const char* overload_name = "grad_input"; + static constexpr const char* schema_str = "_upsample_nearest_exact2d_backward.grad_input(Tensor grad_output, SymInt[2] output_size, SymInt[4] input_size, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & grad_input); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & grad_input); +}; + +struct TORCH_API _upsample_nearest_exact2d_backward { + using schema = at::Tensor (const at::Tensor &, c10::SymIntArrayRef, c10::SymIntArrayRef, ::std::optional, ::std::optional); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_upsample_nearest_exact2d_backward"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_upsample_nearest_exact2d_backward(Tensor grad_output, SymInt[2] output_size, SymInt[4] input_size, float? scales_h=None, float? scales_w=None) -> Tensor"; + static at::Tensor call(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales_h, ::std::optional scales_w); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales_h, ::std::optional scales_w); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact2d_compositeexplicitautogradnonfunctional_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact2d_compositeexplicitautogradnonfunctional_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..11c14edbe3f7858206258175210d3527343d138a --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact2d_compositeexplicitautogradnonfunctional_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautogradnonfunctional { + +TORCH_API at::Tensor _upsample_nearest_exact2d(const at::Tensor & self, at::IntArrayRef output_size, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor _upsample_nearest_exact2d_symint(const at::Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); + +} // namespace compositeexplicitautogradnonfunctional +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact2d_compositeimplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact2d_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..9044b057ad333417d221144226330af9c15ad93b --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact2d_compositeimplicitautograd_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor _upsample_nearest_exact2d(const at::Tensor & input, at::OptionalIntArrayRef output_size, ::std::optional> scale_factors); +TORCH_API at::Tensor _upsample_nearest_exact2d_symint(const at::Tensor & input, at::OptionalSymIntArrayRef output_size, ::std::optional> scale_factors); + +} // namespace compositeimplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact2d_cpu_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact2d_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..303a297b3d8480dd70f1acbcba08e41a941f4121 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact2d_cpu_dispatch.h @@ -0,0 +1,28 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor _upsample_nearest_exact2d(const at::Tensor & self, at::IntArrayRef output_size, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor _upsample_nearest_exact2d_symint(const at::Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor & _upsample_nearest_exact2d_out(at::Tensor & out, const at::Tensor & self, at::IntArrayRef output_size, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor & _upsample_nearest_exact2d_outf(const at::Tensor & self, at::IntArrayRef output_size, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & out); +TORCH_API at::Tensor & _upsample_nearest_exact2d_symint_out(at::Tensor & out, const at::Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor & _upsample_nearest_exact2d_symint_outf(const at::Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & out); + +} // namespace cpu +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact2d_cuda_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact2d_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..a54546d32a89d8c279580a5395d18e19fbe45c02 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact2d_cuda_dispatch.h @@ -0,0 +1,28 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor _upsample_nearest_exact2d(const at::Tensor & self, at::IntArrayRef output_size, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor _upsample_nearest_exact2d_symint(const at::Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor & _upsample_nearest_exact2d_out(at::Tensor & out, const at::Tensor & self, at::IntArrayRef output_size, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor & _upsample_nearest_exact2d_outf(const at::Tensor & self, at::IntArrayRef output_size, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & out); +TORCH_API at::Tensor & _upsample_nearest_exact2d_symint_out(at::Tensor & out, const at::Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor & _upsample_nearest_exact2d_symint_outf(const at::Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & out); + +} // namespace cuda +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact2d_meta.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact2d_meta.h new file mode 100644 index 0000000000000000000000000000000000000000..ca91b233e0d2d7004e540b60345a42a7f4983d91 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact2d_meta.h @@ -0,0 +1,27 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeMetaFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace meta { + +struct TORCH_API structured__upsample_nearest_exact2d : public at::impl::MetaBase { + + + void meta(const at::Tensor & self, at::ArrayRef output_size, ::std::optional scales_h, ::std::optional scales_w); +}; + +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact2d_meta_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact2d_meta_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..a7c69f7fd0510a1e0069ff6624c168b44c0b8ef2 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact2d_meta_dispatch.h @@ -0,0 +1,28 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace meta { + +TORCH_API at::Tensor _upsample_nearest_exact2d(const at::Tensor & self, at::IntArrayRef output_size, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor _upsample_nearest_exact2d_symint(const at::Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor & _upsample_nearest_exact2d_out(at::Tensor & out, const at::Tensor & self, at::IntArrayRef output_size, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor & _upsample_nearest_exact2d_outf(const at::Tensor & self, at::IntArrayRef output_size, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & out); +TORCH_API at::Tensor & _upsample_nearest_exact2d_symint_out(at::Tensor & out, const at::Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor & _upsample_nearest_exact2d_symint_outf(const at::Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & out); + +} // namespace meta +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact2d_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact2d_native.h new file mode 100644 index 0000000000000000000000000000000000000000..e4a7c2d38c291ffd4af9c69e2ce0c760b4a2cc56 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact2d_native.h @@ -0,0 +1,28 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace native { +TORCH_API at::Tensor _upsample_nearest_exact2d(const at::Tensor & input, at::OptionalIntArrayRef output_size, ::std::optional> scale_factors); +struct TORCH_API structured__upsample_nearest_exact2d_out_cpu : public at::meta::structured__upsample_nearest_exact2d { +void impl(const at::Tensor & self, at::ArrayRef output_size, ::std::optional scales_h, ::std::optional scales_w, const at::Tensor & out); +}; +struct TORCH_API structured__upsample_nearest_exact2d_out_cuda : public at::meta::structured__upsample_nearest_exact2d { +void impl(const at::Tensor & self, at::ArrayRef output_size, ::std::optional scales_h, ::std::optional scales_w, const at::Tensor & out); +}; +TORCH_API at::Tensor _upsample_nearest_exact2d_quantized_cpu(const at::Tensor & self, at::IntArrayRef output_size, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact2d_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact2d_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..2c36fc5f625d77a0dd79ec93da90a58143a3f404 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact2d_ops.h @@ -0,0 +1,51 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _upsample_nearest_exact2d_vec { + using schema = at::Tensor (const at::Tensor &, at::OptionalSymIntArrayRef, ::std::optional>); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_upsample_nearest_exact2d"; + static constexpr const char* overload_name = "vec"; + static constexpr const char* schema_str = "_upsample_nearest_exact2d.vec(Tensor input, SymInt[]? output_size, float[]? scale_factors) -> Tensor"; + static at::Tensor call(const at::Tensor & input, at::OptionalSymIntArrayRef output_size, ::std::optional> scale_factors); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & input, at::OptionalSymIntArrayRef output_size, ::std::optional> scale_factors); +}; + +struct TORCH_API _upsample_nearest_exact2d_out { + using schema = at::Tensor & (const at::Tensor &, c10::SymIntArrayRef, ::std::optional, ::std::optional, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_upsample_nearest_exact2d"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_upsample_nearest_exact2d.out(Tensor self, SymInt[2] output_size, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & out); +}; + +struct TORCH_API _upsample_nearest_exact2d { + using schema = at::Tensor (const at::Tensor &, c10::SymIntArrayRef, ::std::optional, ::std::optional); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_upsample_nearest_exact2d"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_upsample_nearest_exact2d(Tensor self, SymInt[2] output_size, float? scales_h=None, float? scales_w=None) -> Tensor"; + static at::Tensor call(const at::Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales_h, ::std::optional scales_w); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales_h, ::std::optional scales_w); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact3d.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact3d.h new file mode 100644 index 0000000000000000000000000000000000000000..57a228eb3a7844050c10c2e92ea2750e625262ad --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact3d.h @@ -0,0 +1,114 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_upsample_nearest_exact3d.vec(Tensor input, SymInt[]? output_size, float[]? scale_factors) -> Tensor +inline at::Tensor _upsample_nearest_exact3d(const at::Tensor & input, at::OptionalIntArrayRef output_size, ::std::optional> scale_factors) { + return at::_ops::_upsample_nearest_exact3d_vec::call(input, output_size.has_value() ? ::std::make_optional(c10::fromIntArrayRefSlow(*output_size)) : ::std::nullopt, scale_factors); +} +namespace symint { + template >> + at::Tensor _upsample_nearest_exact3d(const at::Tensor & input, at::OptionalIntArrayRef output_size, ::std::optional> scale_factors) { + return at::_ops::_upsample_nearest_exact3d_vec::call(input, output_size.has_value() ? ::std::make_optional(c10::fromIntArrayRefSlow(*output_size)) : ::std::nullopt, scale_factors); + } +} + +// aten::_upsample_nearest_exact3d.vec(Tensor input, SymInt[]? output_size, float[]? scale_factors) -> Tensor +inline at::Tensor _upsample_nearest_exact3d_symint(const at::Tensor & input, at::OptionalSymIntArrayRef output_size, ::std::optional> scale_factors) { + return at::_ops::_upsample_nearest_exact3d_vec::call(input, output_size, scale_factors); +} +namespace symint { + template >> + at::Tensor _upsample_nearest_exact3d(const at::Tensor & input, at::OptionalSymIntArrayRef output_size, ::std::optional> scale_factors) { + return at::_ops::_upsample_nearest_exact3d_vec::call(input, output_size, scale_factors); + } +} + +// aten::_upsample_nearest_exact3d.out(Tensor self, SymInt[3] output_size, float? scales_d=None, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _upsample_nearest_exact3d_out(at::Tensor & out, const at::Tensor & self, at::IntArrayRef output_size, ::std::optional scales_d=::std::nullopt, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_nearest_exact3d_out::call(self, c10::fromIntArrayRefSlow(output_size), scales_d, scales_h, scales_w, out); +} +namespace symint { + template >> + at::Tensor & _upsample_nearest_exact3d_out(at::Tensor & out, const at::Tensor & self, at::IntArrayRef output_size, ::std::optional scales_d=::std::nullopt, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_nearest_exact3d_out::call(self, c10::fromIntArrayRefSlow(output_size), scales_d, scales_h, scales_w, out); + } +} + +// aten::_upsample_nearest_exact3d.out(Tensor self, SymInt[3] output_size, float? scales_d=None, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _upsample_nearest_exact3d_outf(const at::Tensor & self, at::IntArrayRef output_size, ::std::optional scales_d, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & out) { + return at::_ops::_upsample_nearest_exact3d_out::call(self, c10::fromIntArrayRefSlow(output_size), scales_d, scales_h, scales_w, out); +} +namespace symint { + template >> + at::Tensor & _upsample_nearest_exact3d_outf(const at::Tensor & self, at::IntArrayRef output_size, ::std::optional scales_d, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & out) { + return at::_ops::_upsample_nearest_exact3d_out::call(self, c10::fromIntArrayRefSlow(output_size), scales_d, scales_h, scales_w, out); + } +} + +// aten::_upsample_nearest_exact3d.out(Tensor self, SymInt[3] output_size, float? scales_d=None, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _upsample_nearest_exact3d_symint_out(at::Tensor & out, const at::Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales_d=::std::nullopt, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_nearest_exact3d_out::call(self, output_size, scales_d, scales_h, scales_w, out); +} +namespace symint { + template >> + at::Tensor & _upsample_nearest_exact3d_out(at::Tensor & out, const at::Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales_d=::std::nullopt, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_nearest_exact3d_out::call(self, output_size, scales_d, scales_h, scales_w, out); + } +} + +// aten::_upsample_nearest_exact3d.out(Tensor self, SymInt[3] output_size, float? scales_d=None, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _upsample_nearest_exact3d_symint_outf(const at::Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales_d, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & out) { + return at::_ops::_upsample_nearest_exact3d_out::call(self, output_size, scales_d, scales_h, scales_w, out); +} +namespace symint { + template >> + at::Tensor & _upsample_nearest_exact3d_outf(const at::Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales_d, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & out) { + return at::_ops::_upsample_nearest_exact3d_out::call(self, output_size, scales_d, scales_h, scales_w, out); + } +} + +// aten::_upsample_nearest_exact3d(Tensor self, SymInt[3] output_size, float? scales_d=None, float? scales_h=None, float? scales_w=None) -> Tensor +inline at::Tensor _upsample_nearest_exact3d(const at::Tensor & self, at::IntArrayRef output_size, ::std::optional scales_d=::std::nullopt, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_nearest_exact3d::call(self, c10::fromIntArrayRefSlow(output_size), scales_d, scales_h, scales_w); +} +namespace symint { + template >> + at::Tensor _upsample_nearest_exact3d(const at::Tensor & self, at::IntArrayRef output_size, ::std::optional scales_d=::std::nullopt, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_nearest_exact3d::call(self, c10::fromIntArrayRefSlow(output_size), scales_d, scales_h, scales_w); + } +} + +// aten::_upsample_nearest_exact3d(Tensor self, SymInt[3] output_size, float? scales_d=None, float? scales_h=None, float? scales_w=None) -> Tensor +inline at::Tensor _upsample_nearest_exact3d_symint(const at::Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales_d=::std::nullopt, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_nearest_exact3d::call(self, output_size, scales_d, scales_h, scales_w); +} +namespace symint { + template >> + at::Tensor _upsample_nearest_exact3d(const at::Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales_d=::std::nullopt, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_nearest_exact3d::call(self, output_size, scales_d, scales_h, scales_w); + } +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact3d_backward.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact3d_backward.h new file mode 100644 index 0000000000000000000000000000000000000000..f7a31620f1ace995dda1e31af3fc8fc1c06ba197 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact3d_backward.h @@ -0,0 +1,92 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_upsample_nearest_exact3d_backward.grad_input(Tensor grad_output, SymInt[3] output_size, SymInt[5] input_size, float? scales_d=None, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!) +inline at::Tensor & _upsample_nearest_exact3d_backward_out(at::Tensor & grad_input, const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, ::std::optional scales_d=::std::nullopt, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_nearest_exact3d_backward_grad_input::call(grad_output, c10::fromIntArrayRefSlow(output_size), c10::fromIntArrayRefSlow(input_size), scales_d, scales_h, scales_w, grad_input); +} +namespace symint { + template >> + at::Tensor & _upsample_nearest_exact3d_backward_out(at::Tensor & grad_input, const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, ::std::optional scales_d=::std::nullopt, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_nearest_exact3d_backward_grad_input::call(grad_output, c10::fromIntArrayRefSlow(output_size), c10::fromIntArrayRefSlow(input_size), scales_d, scales_h, scales_w, grad_input); + } +} + +// aten::_upsample_nearest_exact3d_backward.grad_input(Tensor grad_output, SymInt[3] output_size, SymInt[5] input_size, float? scales_d=None, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!) +inline at::Tensor & _upsample_nearest_exact3d_backward_outf(const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, ::std::optional scales_d, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & grad_input) { + return at::_ops::_upsample_nearest_exact3d_backward_grad_input::call(grad_output, c10::fromIntArrayRefSlow(output_size), c10::fromIntArrayRefSlow(input_size), scales_d, scales_h, scales_w, grad_input); +} +namespace symint { + template >> + at::Tensor & _upsample_nearest_exact3d_backward_outf(const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, ::std::optional scales_d, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & grad_input) { + return at::_ops::_upsample_nearest_exact3d_backward_grad_input::call(grad_output, c10::fromIntArrayRefSlow(output_size), c10::fromIntArrayRefSlow(input_size), scales_d, scales_h, scales_w, grad_input); + } +} + +// aten::_upsample_nearest_exact3d_backward.grad_input(Tensor grad_output, SymInt[3] output_size, SymInt[5] input_size, float? scales_d=None, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!) +inline at::Tensor & _upsample_nearest_exact3d_backward_symint_out(at::Tensor & grad_input, const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales_d=::std::nullopt, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_nearest_exact3d_backward_grad_input::call(grad_output, output_size, input_size, scales_d, scales_h, scales_w, grad_input); +} +namespace symint { + template >> + at::Tensor & _upsample_nearest_exact3d_backward_out(at::Tensor & grad_input, const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales_d=::std::nullopt, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_nearest_exact3d_backward_grad_input::call(grad_output, output_size, input_size, scales_d, scales_h, scales_w, grad_input); + } +} + +// aten::_upsample_nearest_exact3d_backward.grad_input(Tensor grad_output, SymInt[3] output_size, SymInt[5] input_size, float? scales_d=None, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!) +inline at::Tensor & _upsample_nearest_exact3d_backward_symint_outf(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales_d, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & grad_input) { + return at::_ops::_upsample_nearest_exact3d_backward_grad_input::call(grad_output, output_size, input_size, scales_d, scales_h, scales_w, grad_input); +} +namespace symint { + template >> + at::Tensor & _upsample_nearest_exact3d_backward_outf(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales_d, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & grad_input) { + return at::_ops::_upsample_nearest_exact3d_backward_grad_input::call(grad_output, output_size, input_size, scales_d, scales_h, scales_w, grad_input); + } +} + +// aten::_upsample_nearest_exact3d_backward(Tensor grad_output, SymInt[3] output_size, SymInt[5] input_size, float? scales_d=None, float? scales_h=None, float? scales_w=None) -> Tensor +inline at::Tensor _upsample_nearest_exact3d_backward(const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, ::std::optional scales_d=::std::nullopt, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_nearest_exact3d_backward::call(grad_output, c10::fromIntArrayRefSlow(output_size), c10::fromIntArrayRefSlow(input_size), scales_d, scales_h, scales_w); +} +namespace symint { + template >> + at::Tensor _upsample_nearest_exact3d_backward(const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, ::std::optional scales_d=::std::nullopt, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_nearest_exact3d_backward::call(grad_output, c10::fromIntArrayRefSlow(output_size), c10::fromIntArrayRefSlow(input_size), scales_d, scales_h, scales_w); + } +} + +// aten::_upsample_nearest_exact3d_backward(Tensor grad_output, SymInt[3] output_size, SymInt[5] input_size, float? scales_d=None, float? scales_h=None, float? scales_w=None) -> Tensor +inline at::Tensor _upsample_nearest_exact3d_backward_symint(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales_d=::std::nullopt, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_nearest_exact3d_backward::call(grad_output, output_size, input_size, scales_d, scales_h, scales_w); +} +namespace symint { + template >> + at::Tensor _upsample_nearest_exact3d_backward(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales_d=::std::nullopt, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_nearest_exact3d_backward::call(grad_output, output_size, input_size, scales_d, scales_h, scales_w); + } +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact3d_backward_compositeexplicitautogradnonfunctional_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact3d_backward_compositeexplicitautogradnonfunctional_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..236526cc020bb6bc9b45bd1a4ace77a1991d6427 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact3d_backward_compositeexplicitautogradnonfunctional_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautogradnonfunctional { + +TORCH_API at::Tensor _upsample_nearest_exact3d_backward(const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, ::std::optional scales_d=::std::nullopt, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor _upsample_nearest_exact3d_backward_symint(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales_d=::std::nullopt, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); + +} // namespace compositeexplicitautogradnonfunctional +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact3d_backward_cpu_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact3d_backward_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..e21ab05868fb8b4288b35e570fc500e1d4fbba33 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact3d_backward_cpu_dispatch.h @@ -0,0 +1,28 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor _upsample_nearest_exact3d_backward(const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, ::std::optional scales_d=::std::nullopt, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor _upsample_nearest_exact3d_backward_symint(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales_d=::std::nullopt, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor & _upsample_nearest_exact3d_backward_out(at::Tensor & grad_input, const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, ::std::optional scales_d=::std::nullopt, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor & _upsample_nearest_exact3d_backward_outf(const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, ::std::optional scales_d, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & grad_input); +TORCH_API at::Tensor & _upsample_nearest_exact3d_backward_symint_out(at::Tensor & grad_input, const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales_d=::std::nullopt, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor & _upsample_nearest_exact3d_backward_symint_outf(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales_d, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & grad_input); + +} // namespace cpu +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact3d_backward_cuda_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact3d_backward_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..5ffcef201037e07f6bb113eff004698f99dd1567 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact3d_backward_cuda_dispatch.h @@ -0,0 +1,28 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor _upsample_nearest_exact3d_backward(const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, ::std::optional scales_d=::std::nullopt, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor _upsample_nearest_exact3d_backward_symint(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales_d=::std::nullopt, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor & _upsample_nearest_exact3d_backward_out(at::Tensor & grad_input, const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, ::std::optional scales_d=::std::nullopt, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor & _upsample_nearest_exact3d_backward_outf(const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, ::std::optional scales_d, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & grad_input); +TORCH_API at::Tensor & _upsample_nearest_exact3d_backward_symint_out(at::Tensor & grad_input, const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales_d=::std::nullopt, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor & _upsample_nearest_exact3d_backward_symint_outf(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales_d, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & grad_input); + +} // namespace cuda +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact3d_backward_meta.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact3d_backward_meta.h new file mode 100644 index 0000000000000000000000000000000000000000..9f00934393cefddee4dba4806e32cd19d2b8d653 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact3d_backward_meta.h @@ -0,0 +1,27 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeMetaFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace meta { + +struct TORCH_API structured__upsample_nearest_exact3d_backward : public at::impl::MetaBase { + + + void meta(const at::Tensor & grad_output, at::ArrayRef output_size, at::ArrayRef input_size, ::std::optional scales_d, ::std::optional scales_h, ::std::optional scales_w); +}; + +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact3d_backward_meta_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact3d_backward_meta_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..385a46c829263dead80ff1cca92e88db17c886d1 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact3d_backward_meta_dispatch.h @@ -0,0 +1,28 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace meta { + +TORCH_API at::Tensor _upsample_nearest_exact3d_backward(const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, ::std::optional scales_d=::std::nullopt, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor _upsample_nearest_exact3d_backward_symint(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales_d=::std::nullopt, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor & _upsample_nearest_exact3d_backward_out(at::Tensor & grad_input, const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, ::std::optional scales_d=::std::nullopt, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor & _upsample_nearest_exact3d_backward_outf(const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, ::std::optional scales_d, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & grad_input); +TORCH_API at::Tensor & _upsample_nearest_exact3d_backward_symint_out(at::Tensor & grad_input, const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales_d=::std::nullopt, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor & _upsample_nearest_exact3d_backward_symint_outf(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales_d, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & grad_input); + +} // namespace meta +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact3d_backward_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact3d_backward_native.h new file mode 100644 index 0000000000000000000000000000000000000000..194b0213de075fd1198ef60273e7ab864e388123 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact3d_backward_native.h @@ -0,0 +1,26 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace native { +struct TORCH_API structured__upsample_nearest_exact3d_backward_out_cpu : public at::meta::structured__upsample_nearest_exact3d_backward { +void impl(const at::Tensor & grad_output, at::ArrayRef output_size, at::ArrayRef input_size, ::std::optional scales_d, ::std::optional scales_h, ::std::optional scales_w, const at::Tensor & grad_input); +}; +struct TORCH_API structured__upsample_nearest_exact3d_backward_out_cuda : public at::meta::structured__upsample_nearest_exact3d_backward { +void impl(const at::Tensor & grad_output, at::ArrayRef output_size, at::ArrayRef input_size, ::std::optional scales_d, ::std::optional scales_h, ::std::optional scales_w, const at::Tensor & grad_input); +}; +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact3d_backward_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact3d_backward_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..d6743eeefff9633301449fe5ea1c83027df45058 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact3d_backward_ops.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _upsample_nearest_exact3d_backward_grad_input { + using schema = at::Tensor & (const at::Tensor &, c10::SymIntArrayRef, c10::SymIntArrayRef, ::std::optional, ::std::optional, ::std::optional, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_upsample_nearest_exact3d_backward"; + static constexpr const char* overload_name = "grad_input"; + static constexpr const char* schema_str = "_upsample_nearest_exact3d_backward.grad_input(Tensor grad_output, SymInt[3] output_size, SymInt[5] input_size, float? scales_d=None, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales_d, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & grad_input); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales_d, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & grad_input); +}; + +struct TORCH_API _upsample_nearest_exact3d_backward { + using schema = at::Tensor (const at::Tensor &, c10::SymIntArrayRef, c10::SymIntArrayRef, ::std::optional, ::std::optional, ::std::optional); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_upsample_nearest_exact3d_backward"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_upsample_nearest_exact3d_backward(Tensor grad_output, SymInt[3] output_size, SymInt[5] input_size, float? scales_d=None, float? scales_h=None, float? scales_w=None) -> Tensor"; + static at::Tensor call(const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales_d, ::std::optional scales_h, ::std::optional scales_w); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & grad_output, c10::SymIntArrayRef output_size, c10::SymIntArrayRef input_size, ::std::optional scales_d, ::std::optional scales_h, ::std::optional scales_w); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact3d_compositeexplicitautogradnonfunctional_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact3d_compositeexplicitautogradnonfunctional_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..8767e630f2b6c0d460c8270bcb9640335f2f7811 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact3d_compositeexplicitautogradnonfunctional_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautogradnonfunctional { + +TORCH_API at::Tensor _upsample_nearest_exact3d(const at::Tensor & self, at::IntArrayRef output_size, ::std::optional scales_d=::std::nullopt, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor _upsample_nearest_exact3d_symint(const at::Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales_d=::std::nullopt, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); + +} // namespace compositeexplicitautogradnonfunctional +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact3d_compositeimplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact3d_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..823edef65132e41f59674b932564e6aad59be34c --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact3d_compositeimplicitautograd_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor _upsample_nearest_exact3d(const at::Tensor & input, at::OptionalIntArrayRef output_size, ::std::optional> scale_factors); +TORCH_API at::Tensor _upsample_nearest_exact3d_symint(const at::Tensor & input, at::OptionalSymIntArrayRef output_size, ::std::optional> scale_factors); + +} // namespace compositeimplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact3d_cpu_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact3d_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..7b5ff539b87d5148225990085750dc2f0f67955d --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact3d_cpu_dispatch.h @@ -0,0 +1,28 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor _upsample_nearest_exact3d(const at::Tensor & self, at::IntArrayRef output_size, ::std::optional scales_d=::std::nullopt, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor _upsample_nearest_exact3d_symint(const at::Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales_d=::std::nullopt, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor & _upsample_nearest_exact3d_out(at::Tensor & out, const at::Tensor & self, at::IntArrayRef output_size, ::std::optional scales_d=::std::nullopt, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor & _upsample_nearest_exact3d_outf(const at::Tensor & self, at::IntArrayRef output_size, ::std::optional scales_d, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & out); +TORCH_API at::Tensor & _upsample_nearest_exact3d_symint_out(at::Tensor & out, const at::Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales_d=::std::nullopt, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor & _upsample_nearest_exact3d_symint_outf(const at::Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales_d, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & out); + +} // namespace cpu +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact3d_cuda_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact3d_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..ad297ce0d57fe6d04a8768622ed1515bfeb42b76 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact3d_cuda_dispatch.h @@ -0,0 +1,28 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor _upsample_nearest_exact3d(const at::Tensor & self, at::IntArrayRef output_size, ::std::optional scales_d=::std::nullopt, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor _upsample_nearest_exact3d_symint(const at::Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales_d=::std::nullopt, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor & _upsample_nearest_exact3d_out(at::Tensor & out, const at::Tensor & self, at::IntArrayRef output_size, ::std::optional scales_d=::std::nullopt, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor & _upsample_nearest_exact3d_outf(const at::Tensor & self, at::IntArrayRef output_size, ::std::optional scales_d, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & out); +TORCH_API at::Tensor & _upsample_nearest_exact3d_symint_out(at::Tensor & out, const at::Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales_d=::std::nullopt, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor & _upsample_nearest_exact3d_symint_outf(const at::Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales_d, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & out); + +} // namespace cuda +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact3d_meta.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact3d_meta.h new file mode 100644 index 0000000000000000000000000000000000000000..e2a0b6c47c2c46b1592455cb482ccab9822eb879 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact3d_meta.h @@ -0,0 +1,27 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeMetaFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace meta { + +struct TORCH_API structured__upsample_nearest_exact3d : public at::impl::MetaBase { + + + void meta(const at::Tensor & self, at::ArrayRef output_size, ::std::optional scales_d, ::std::optional scales_h, ::std::optional scales_w); +}; + +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact3d_meta_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact3d_meta_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..30b4744c5ceb52f611b7f25b6fe34e10b0a31837 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact3d_meta_dispatch.h @@ -0,0 +1,28 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace meta { + +TORCH_API at::Tensor _upsample_nearest_exact3d(const at::Tensor & self, at::IntArrayRef output_size, ::std::optional scales_d=::std::nullopt, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor _upsample_nearest_exact3d_symint(const at::Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales_d=::std::nullopt, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor & _upsample_nearest_exact3d_out(at::Tensor & out, const at::Tensor & self, at::IntArrayRef output_size, ::std::optional scales_d=::std::nullopt, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor & _upsample_nearest_exact3d_outf(const at::Tensor & self, at::IntArrayRef output_size, ::std::optional scales_d, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & out); +TORCH_API at::Tensor & _upsample_nearest_exact3d_symint_out(at::Tensor & out, const at::Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales_d=::std::nullopt, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +TORCH_API at::Tensor & _upsample_nearest_exact3d_symint_outf(const at::Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales_d, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & out); + +} // namespace meta +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact3d_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact3d_native.h new file mode 100644 index 0000000000000000000000000000000000000000..d23b74c1008d3d8ce0cf59a965f9bdc5a27a1246 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact3d_native.h @@ -0,0 +1,28 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace native { +TORCH_API at::Tensor _upsample_nearest_exact3d(const at::Tensor & input, at::OptionalIntArrayRef output_size, ::std::optional> scale_factors); +struct TORCH_API structured__upsample_nearest_exact3d_out_cpu : public at::meta::structured__upsample_nearest_exact3d { +void impl(const at::Tensor & self, at::ArrayRef output_size, ::std::optional scales_d, ::std::optional scales_h, ::std::optional scales_w, const at::Tensor & out); +}; +struct TORCH_API structured__upsample_nearest_exact3d_out_cuda : public at::meta::structured__upsample_nearest_exact3d { +void impl(const at::Tensor & self, at::ArrayRef output_size, ::std::optional scales_d, ::std::optional scales_h, ::std::optional scales_w, const at::Tensor & out); +}; +TORCH_API at::Tensor _upsample_nearest_exact3d_quantized_cpu(const at::Tensor & self, at::IntArrayRef output_size, ::std::optional scales_d=::std::nullopt, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact3d_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact3d_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..dbfbae5e64efedc22d992c186c1ff666c0cf8aa0 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_upsample_nearest_exact3d_ops.h @@ -0,0 +1,51 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _upsample_nearest_exact3d_vec { + using schema = at::Tensor (const at::Tensor &, at::OptionalSymIntArrayRef, ::std::optional>); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_upsample_nearest_exact3d"; + static constexpr const char* overload_name = "vec"; + static constexpr const char* schema_str = "_upsample_nearest_exact3d.vec(Tensor input, SymInt[]? output_size, float[]? scale_factors) -> Tensor"; + static at::Tensor call(const at::Tensor & input, at::OptionalSymIntArrayRef output_size, ::std::optional> scale_factors); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & input, at::OptionalSymIntArrayRef output_size, ::std::optional> scale_factors); +}; + +struct TORCH_API _upsample_nearest_exact3d_out { + using schema = at::Tensor & (const at::Tensor &, c10::SymIntArrayRef, ::std::optional, ::std::optional, ::std::optional, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_upsample_nearest_exact3d"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_upsample_nearest_exact3d.out(Tensor self, SymInt[3] output_size, float? scales_d=None, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales_d, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales_d, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & out); +}; + +struct TORCH_API _upsample_nearest_exact3d { + using schema = at::Tensor (const at::Tensor &, c10::SymIntArrayRef, ::std::optional, ::std::optional, ::std::optional); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_upsample_nearest_exact3d"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_upsample_nearest_exact3d(Tensor self, SymInt[3] output_size, float? scales_d=None, float? scales_h=None, float? scales_w=None) -> Tensor"; + static at::Tensor call(const at::Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales_d, ::std::optional scales_h, ::std::optional scales_w); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales_d, ::std::optional scales_h, ::std::optional scales_w); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_use_cudnn_ctc_loss.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_use_cudnn_ctc_loss.h new file mode 100644 index 0000000000000000000000000000000000000000..fc190df6b2ae360b39cb22318df989eeb56f5804 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_use_cudnn_ctc_loss.h @@ -0,0 +1,36 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_use_cudnn_ctc_loss(Tensor log_probs, Tensor targets, int[] input_lengths, int[] target_lengths, int blank) -> bool +inline bool _use_cudnn_ctc_loss(const at::Tensor & log_probs, const at::Tensor & targets, at::IntArrayRef input_lengths, at::IntArrayRef target_lengths, int64_t blank) { + return at::_ops::_use_cudnn_ctc_loss::call(log_probs, targets, input_lengths, target_lengths, blank); +} + +// aten::_use_cudnn_ctc_loss.Tensor(Tensor log_probs, Tensor targets, Tensor input_lengths, Tensor target_lengths, int blank) -> bool +inline bool _use_cudnn_ctc_loss(const at::Tensor & log_probs, const at::Tensor & targets, const at::Tensor & input_lengths, const at::Tensor & target_lengths, int64_t blank) { + return at::_ops::_use_cudnn_ctc_loss_Tensor::call(log_probs, targets, input_lengths, target_lengths, blank); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_use_cudnn_ctc_loss_cuda_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_use_cudnn_ctc_loss_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..114a52e52629bdc8be7bcbe6b63a56c0ee8b003b --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_use_cudnn_ctc_loss_cuda_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API bool _use_cudnn_ctc_loss(const at::Tensor & log_probs, const at::Tensor & targets, at::IntArrayRef input_lengths, at::IntArrayRef target_lengths, int64_t blank); +TORCH_API bool _use_cudnn_ctc_loss(const at::Tensor & log_probs, const at::Tensor & targets, const at::Tensor & input_lengths, const at::Tensor & target_lengths, int64_t blank); + +} // namespace cuda +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_use_cudnn_ctc_loss_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_use_cudnn_ctc_loss_native.h new file mode 100644 index 0000000000000000000000000000000000000000..692af48c4d24379fb04641713b60b362568e906f --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_use_cudnn_ctc_loss_native.h @@ -0,0 +1,22 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API bool _use_cudnn_ctc_loss(const at::Tensor & log_probs, const at::Tensor & targets, at::IntArrayRef input_lengths, at::IntArrayRef target_lengths, int64_t blank); +TORCH_API bool _use_cudnn_ctc_loss_tensor(const at::Tensor & log_probs, const at::Tensor & targets, const at::Tensor & input_lengths, const at::Tensor & target_lengths, int64_t blank); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_use_cudnn_ctc_loss_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_use_cudnn_ctc_loss_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..ee2c75adf6cdc656430cf785165dc6279b79068e --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_use_cudnn_ctc_loss_ops.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _use_cudnn_ctc_loss { + using schema = bool (const at::Tensor &, const at::Tensor &, at::IntArrayRef, at::IntArrayRef, int64_t); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_use_cudnn_ctc_loss"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_use_cudnn_ctc_loss(Tensor log_probs, Tensor targets, int[] input_lengths, int[] target_lengths, int blank) -> bool"; + static bool call(const at::Tensor & log_probs, const at::Tensor & targets, at::IntArrayRef input_lengths, at::IntArrayRef target_lengths, int64_t blank); + static bool redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & log_probs, const at::Tensor & targets, at::IntArrayRef input_lengths, at::IntArrayRef target_lengths, int64_t blank); +}; + +struct TORCH_API _use_cudnn_ctc_loss_Tensor { + using schema = bool (const at::Tensor &, const at::Tensor &, const at::Tensor &, const at::Tensor &, int64_t); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_use_cudnn_ctc_loss"; + static constexpr const char* overload_name = "Tensor"; + static constexpr const char* schema_str = "_use_cudnn_ctc_loss.Tensor(Tensor log_probs, Tensor targets, Tensor input_lengths, Tensor target_lengths, int blank) -> bool"; + static bool call(const at::Tensor & log_probs, const at::Tensor & targets, const at::Tensor & input_lengths, const at::Tensor & target_lengths, int64_t blank); + static bool redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & log_probs, const at::Tensor & targets, const at::Tensor & input_lengths, const at::Tensor & target_lengths, int64_t blank); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_use_cudnn_rnn_flatten_weight.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_use_cudnn_rnn_flatten_weight.h new file mode 100644 index 0000000000000000000000000000000000000000..80b274211b919c9c3cc6b84c380548f614769b6a --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_use_cudnn_rnn_flatten_weight.h @@ -0,0 +1,31 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_use_cudnn_rnn_flatten_weight() -> bool +inline bool _use_cudnn_rnn_flatten_weight() { + return at::_ops::_use_cudnn_rnn_flatten_weight::call(); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_use_cudnn_rnn_flatten_weight_compositeimplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_use_cudnn_rnn_flatten_weight_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..8ab7850b6a6cf69d24221195b1b9f1a74d740caf --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_use_cudnn_rnn_flatten_weight_compositeimplicitautograd_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API bool _use_cudnn_rnn_flatten_weight(); + +} // namespace compositeimplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_use_cudnn_rnn_flatten_weight_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_use_cudnn_rnn_flatten_weight_native.h new file mode 100644 index 0000000000000000000000000000000000000000..eede1e89396d4dedc75cd4ac6609e200429b327a --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_use_cudnn_rnn_flatten_weight_native.h @@ -0,0 +1,21 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API bool _use_cudnn_rnn_flatten_weight(); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_use_cudnn_rnn_flatten_weight_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_use_cudnn_rnn_flatten_weight_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..fc98a936951cb707b557405f77e12ad3d9c0b75e --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_use_cudnn_rnn_flatten_weight_ops.h @@ -0,0 +1,29 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _use_cudnn_rnn_flatten_weight { + using schema = bool (); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_use_cudnn_rnn_flatten_weight"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_use_cudnn_rnn_flatten_weight() -> bool"; + static bool call(); + static bool redispatch(c10::DispatchKeySet dispatchKeySet); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_compressed_sparse_indices.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_compressed_sparse_indices.h new file mode 100644 index 0000000000000000000000000000000000000000..b7ff1eef2d23c39434a8d6b951ba2fd81d82969d --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_compressed_sparse_indices.h @@ -0,0 +1,31 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_validate_compressed_sparse_indices(bool is_crow, Tensor compressed_idx, Tensor plain_idx, int cdim, int dim, int nnz) -> () +inline void _validate_compressed_sparse_indices(bool is_crow, const at::Tensor & compressed_idx, const at::Tensor & plain_idx, int64_t cdim, int64_t dim, int64_t nnz) { + return at::_ops::_validate_compressed_sparse_indices::call(is_crow, compressed_idx, plain_idx, cdim, dim, nnz); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_compressed_sparse_indices_cpu_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_compressed_sparse_indices_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..c3b066cc74f2082cbedc27b43298ba73ae6f708d --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_compressed_sparse_indices_cpu_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API void _validate_compressed_sparse_indices(bool is_crow, const at::Tensor & compressed_idx, const at::Tensor & plain_idx, int64_t cdim, int64_t dim, int64_t nnz); + +} // namespace cpu +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_compressed_sparse_indices_cuda_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_compressed_sparse_indices_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..d7fa1248f2ddd1de5d12abf8aad7565f16e8e773 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_compressed_sparse_indices_cuda_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API void _validate_compressed_sparse_indices(bool is_crow, const at::Tensor & compressed_idx, const at::Tensor & plain_idx, int64_t cdim, int64_t dim, int64_t nnz); + +} // namespace cuda +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_compressed_sparse_indices_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_compressed_sparse_indices_native.h new file mode 100644 index 0000000000000000000000000000000000000000..ce9d24ff0088c1b53adb8784b0e84c20b290854f --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_compressed_sparse_indices_native.h @@ -0,0 +1,22 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API void _validate_compressed_sparse_indices_cpu(bool is_crow, const at::Tensor & compressed_idx, const at::Tensor & plain_idx, int64_t cdim, int64_t dim, int64_t nnz); +TORCH_API void _validate_compressed_sparse_indices_cuda(bool is_crow, const at::Tensor & compressed_idx, const at::Tensor & plain_idx, int64_t cdim, int64_t dim, int64_t nnz); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_compressed_sparse_indices_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_compressed_sparse_indices_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..95b6399e5419133843467fbd889b5a9578ad8ed7 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_compressed_sparse_indices_ops.h @@ -0,0 +1,29 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _validate_compressed_sparse_indices { + using schema = void (bool, const at::Tensor &, const at::Tensor &, int64_t, int64_t, int64_t); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_validate_compressed_sparse_indices"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_validate_compressed_sparse_indices(bool is_crow, Tensor compressed_idx, Tensor plain_idx, int cdim, int dim, int nnz) -> ()"; + static void call(bool is_crow, const at::Tensor & compressed_idx, const at::Tensor & plain_idx, int64_t cdim, int64_t dim, int64_t nnz); + static void redispatch(c10::DispatchKeySet dispatchKeySet, bool is_crow, const at::Tensor & compressed_idx, const at::Tensor & plain_idx, int64_t cdim, int64_t dim, int64_t nnz); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_sparse_bsc_tensor_args.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_sparse_bsc_tensor_args.h new file mode 100644 index 0000000000000000000000000000000000000000..ec5b56837d139f10bb642a3e4ffd37f9333b98bc --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_sparse_bsc_tensor_args.h @@ -0,0 +1,31 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_validate_sparse_bsc_tensor_args(Tensor ccol_indices, Tensor row_indices, Tensor values, int[] size, bool? check_pinning=None) -> () +inline void _validate_sparse_bsc_tensor_args(const at::Tensor & ccol_indices, const at::Tensor & row_indices, const at::Tensor & values, at::IntArrayRef size, ::std::optional check_pinning=::std::nullopt) { + return at::_ops::_validate_sparse_bsc_tensor_args::call(ccol_indices, row_indices, values, size, check_pinning); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_sparse_bsc_tensor_args_compositeimplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_sparse_bsc_tensor_args_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..12deeaf2c6ddb42eb02a019bdd45ce446afa8146 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_sparse_bsc_tensor_args_compositeimplicitautograd_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API void _validate_sparse_bsc_tensor_args(const at::Tensor & ccol_indices, const at::Tensor & row_indices, const at::Tensor & values, at::IntArrayRef size, ::std::optional check_pinning=::std::nullopt); + +} // namespace compositeimplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_sparse_bsc_tensor_args_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_sparse_bsc_tensor_args_native.h new file mode 100644 index 0000000000000000000000000000000000000000..4db35b6d5bc18e6b98dcd3b34e5e6f3048bd40ab --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_sparse_bsc_tensor_args_native.h @@ -0,0 +1,21 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API void _validate_sparse_bsc_tensor_args(const at::Tensor & ccol_indices, const at::Tensor & row_indices, const at::Tensor & values, at::IntArrayRef size, ::std::optional check_pinning=::std::nullopt); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_sparse_bsc_tensor_args_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_sparse_bsc_tensor_args_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..3d52b601d154519d6149764fccda3d22e61d6f9f --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_sparse_bsc_tensor_args_ops.h @@ -0,0 +1,29 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _validate_sparse_bsc_tensor_args { + using schema = void (const at::Tensor &, const at::Tensor &, const at::Tensor &, at::IntArrayRef, ::std::optional); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_validate_sparse_bsc_tensor_args"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_validate_sparse_bsc_tensor_args(Tensor ccol_indices, Tensor row_indices, Tensor values, int[] size, bool? check_pinning=None) -> ()"; + static void call(const at::Tensor & ccol_indices, const at::Tensor & row_indices, const at::Tensor & values, at::IntArrayRef size, ::std::optional check_pinning); + static void redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & ccol_indices, const at::Tensor & row_indices, const at::Tensor & values, at::IntArrayRef size, ::std::optional check_pinning); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_sparse_bsr_tensor_args.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_sparse_bsr_tensor_args.h new file mode 100644 index 0000000000000000000000000000000000000000..0110cf7e0eb4d1ae7f326cfaff3c18e43a38ab99 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_sparse_bsr_tensor_args.h @@ -0,0 +1,31 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_validate_sparse_bsr_tensor_args(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size, bool? check_pinning=None) -> () +inline void _validate_sparse_bsr_tensor_args(const at::Tensor & crow_indices, const at::Tensor & col_indices, const at::Tensor & values, at::IntArrayRef size, ::std::optional check_pinning=::std::nullopt) { + return at::_ops::_validate_sparse_bsr_tensor_args::call(crow_indices, col_indices, values, size, check_pinning); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_sparse_bsr_tensor_args_compositeimplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_sparse_bsr_tensor_args_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..60d460305f58d3e8db390f9b601c0898435bbc51 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_sparse_bsr_tensor_args_compositeimplicitautograd_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API void _validate_sparse_bsr_tensor_args(const at::Tensor & crow_indices, const at::Tensor & col_indices, const at::Tensor & values, at::IntArrayRef size, ::std::optional check_pinning=::std::nullopt); + +} // namespace compositeimplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_sparse_bsr_tensor_args_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_sparse_bsr_tensor_args_native.h new file mode 100644 index 0000000000000000000000000000000000000000..bbda244976da86ff3a6bcd8301b7f4139d0971b5 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_sparse_bsr_tensor_args_native.h @@ -0,0 +1,21 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API void _validate_sparse_bsr_tensor_args(const at::Tensor & crow_indices, const at::Tensor & col_indices, const at::Tensor & values, at::IntArrayRef size, ::std::optional check_pinning=::std::nullopt); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_sparse_bsr_tensor_args_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_sparse_bsr_tensor_args_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..6d2329a270cdf0eab5fd94d96b6f492d59805f66 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_sparse_bsr_tensor_args_ops.h @@ -0,0 +1,29 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _validate_sparse_bsr_tensor_args { + using schema = void (const at::Tensor &, const at::Tensor &, const at::Tensor &, at::IntArrayRef, ::std::optional); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_validate_sparse_bsr_tensor_args"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_validate_sparse_bsr_tensor_args(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size, bool? check_pinning=None) -> ()"; + static void call(const at::Tensor & crow_indices, const at::Tensor & col_indices, const at::Tensor & values, at::IntArrayRef size, ::std::optional check_pinning); + static void redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & crow_indices, const at::Tensor & col_indices, const at::Tensor & values, at::IntArrayRef size, ::std::optional check_pinning); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_sparse_compressed_tensor_args.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_sparse_compressed_tensor_args.h new file mode 100644 index 0000000000000000000000000000000000000000..c9a97e2f2d52e86e25e66c49efedbfad810180f7 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_sparse_compressed_tensor_args.h @@ -0,0 +1,31 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_validate_sparse_compressed_tensor_args(Tensor compressed_indices, Tensor plain_indices, Tensor values, int[] size, Layout layout, bool? check_pinning=None) -> () +inline void _validate_sparse_compressed_tensor_args(const at::Tensor & compressed_indices, const at::Tensor & plain_indices, const at::Tensor & values, at::IntArrayRef size, at::Layout layout, ::std::optional check_pinning=::std::nullopt) { + return at::_ops::_validate_sparse_compressed_tensor_args::call(compressed_indices, plain_indices, values, size, layout, check_pinning); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_sparse_compressed_tensor_args_compositeimplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_sparse_compressed_tensor_args_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..732bef2dde7361563bb19e9aa728cd61aff0dc27 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_sparse_compressed_tensor_args_compositeimplicitautograd_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API void _validate_sparse_compressed_tensor_args(const at::Tensor & compressed_indices, const at::Tensor & plain_indices, const at::Tensor & values, at::IntArrayRef size, at::Layout layout, ::std::optional check_pinning=::std::nullopt); + +} // namespace compositeimplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_sparse_compressed_tensor_args_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_sparse_compressed_tensor_args_native.h new file mode 100644 index 0000000000000000000000000000000000000000..e257c8a30801cbe2b6e0f9b2d85e453d40ef35c3 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_sparse_compressed_tensor_args_native.h @@ -0,0 +1,21 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API void _validate_sparse_compressed_tensor_args(const at::Tensor & compressed_indices, const at::Tensor & plain_indices, const at::Tensor & values, at::IntArrayRef size, at::Layout layout, ::std::optional check_pinning=::std::nullopt); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_sparse_compressed_tensor_args_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_sparse_compressed_tensor_args_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..d5dd726c184cedd755e0310f901d3a4380976b04 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_sparse_compressed_tensor_args_ops.h @@ -0,0 +1,29 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _validate_sparse_compressed_tensor_args { + using schema = void (const at::Tensor &, const at::Tensor &, const at::Tensor &, at::IntArrayRef, at::Layout, ::std::optional); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_validate_sparse_compressed_tensor_args"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_validate_sparse_compressed_tensor_args(Tensor compressed_indices, Tensor plain_indices, Tensor values, int[] size, Layout layout, bool? check_pinning=None) -> ()"; + static void call(const at::Tensor & compressed_indices, const at::Tensor & plain_indices, const at::Tensor & values, at::IntArrayRef size, at::Layout layout, ::std::optional check_pinning); + static void redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & compressed_indices, const at::Tensor & plain_indices, const at::Tensor & values, at::IntArrayRef size, at::Layout layout, ::std::optional check_pinning); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_sparse_coo_tensor_args.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_sparse_coo_tensor_args.h new file mode 100644 index 0000000000000000000000000000000000000000..74299058a670ff932f27adc012ea829ea4d8375c --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_sparse_coo_tensor_args.h @@ -0,0 +1,31 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_validate_sparse_coo_tensor_args(Tensor indices, Tensor values, int[] size, bool? is_coalesced=None, bool? check_pinning=None) -> () +inline void _validate_sparse_coo_tensor_args(const at::Tensor & indices, const at::Tensor & values, at::IntArrayRef size, ::std::optional is_coalesced=::std::nullopt, ::std::optional check_pinning=::std::nullopt) { + return at::_ops::_validate_sparse_coo_tensor_args::call(indices, values, size, is_coalesced, check_pinning); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_sparse_coo_tensor_args_compositeimplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_sparse_coo_tensor_args_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..07c08c8a5c767526d13dfd68813050a7a8217d72 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_sparse_coo_tensor_args_compositeimplicitautograd_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API void _validate_sparse_coo_tensor_args(const at::Tensor & indices, const at::Tensor & values, at::IntArrayRef size, ::std::optional is_coalesced=::std::nullopt, ::std::optional check_pinning=::std::nullopt); + +} // namespace compositeimplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_sparse_coo_tensor_args_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_sparse_coo_tensor_args_native.h new file mode 100644 index 0000000000000000000000000000000000000000..b3c03fe584b6a79a2beba640ef894eb21045ba30 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_sparse_coo_tensor_args_native.h @@ -0,0 +1,21 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API void _validate_sparse_coo_tensor_args(const at::Tensor & indices, const at::Tensor & values, at::IntArrayRef size, ::std::optional is_coalesced=::std::nullopt, ::std::optional check_pinning=::std::nullopt); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_sparse_coo_tensor_args_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_sparse_coo_tensor_args_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..f26c764007c29840ea4c007da87a7b66339ef230 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_sparse_coo_tensor_args_ops.h @@ -0,0 +1,29 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _validate_sparse_coo_tensor_args { + using schema = void (const at::Tensor &, const at::Tensor &, at::IntArrayRef, ::std::optional, ::std::optional); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_validate_sparse_coo_tensor_args"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_validate_sparse_coo_tensor_args(Tensor indices, Tensor values, int[] size, bool? is_coalesced=None, bool? check_pinning=None) -> ()"; + static void call(const at::Tensor & indices, const at::Tensor & values, at::IntArrayRef size, ::std::optional is_coalesced, ::std::optional check_pinning); + static void redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & indices, const at::Tensor & values, at::IntArrayRef size, ::std::optional is_coalesced, ::std::optional check_pinning); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_sparse_csc_tensor_args.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_sparse_csc_tensor_args.h new file mode 100644 index 0000000000000000000000000000000000000000..15e1914f9f195f92590c7b74542e359073b8c718 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_sparse_csc_tensor_args.h @@ -0,0 +1,31 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_validate_sparse_csc_tensor_args(Tensor ccol_indices, Tensor row_indices, Tensor values, int[] size, bool? check_pinning=None) -> () +inline void _validate_sparse_csc_tensor_args(const at::Tensor & ccol_indices, const at::Tensor & row_indices, const at::Tensor & values, at::IntArrayRef size, ::std::optional check_pinning=::std::nullopt) { + return at::_ops::_validate_sparse_csc_tensor_args::call(ccol_indices, row_indices, values, size, check_pinning); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_sparse_csc_tensor_args_compositeimplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_sparse_csc_tensor_args_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..52abe38c1036243d1aa48489b719dce2c83b671d --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_sparse_csc_tensor_args_compositeimplicitautograd_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API void _validate_sparse_csc_tensor_args(const at::Tensor & ccol_indices, const at::Tensor & row_indices, const at::Tensor & values, at::IntArrayRef size, ::std::optional check_pinning=::std::nullopt); + +} // namespace compositeimplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_sparse_csc_tensor_args_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_sparse_csc_tensor_args_native.h new file mode 100644 index 0000000000000000000000000000000000000000..dcf26c43e084a86ca9db83abd3e789af4a988fb1 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_sparse_csc_tensor_args_native.h @@ -0,0 +1,21 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API void _validate_sparse_csc_tensor_args(const at::Tensor & ccol_indices, const at::Tensor & row_indices, const at::Tensor & values, at::IntArrayRef size, ::std::optional check_pinning=::std::nullopt); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_sparse_csc_tensor_args_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_sparse_csc_tensor_args_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..49c6989cc026ea25f5ce2e1c638241f03c865a82 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_sparse_csc_tensor_args_ops.h @@ -0,0 +1,29 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _validate_sparse_csc_tensor_args { + using schema = void (const at::Tensor &, const at::Tensor &, const at::Tensor &, at::IntArrayRef, ::std::optional); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_validate_sparse_csc_tensor_args"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_validate_sparse_csc_tensor_args(Tensor ccol_indices, Tensor row_indices, Tensor values, int[] size, bool? check_pinning=None) -> ()"; + static void call(const at::Tensor & ccol_indices, const at::Tensor & row_indices, const at::Tensor & values, at::IntArrayRef size, ::std::optional check_pinning); + static void redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & ccol_indices, const at::Tensor & row_indices, const at::Tensor & values, at::IntArrayRef size, ::std::optional check_pinning); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_sparse_csr_tensor_args.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_sparse_csr_tensor_args.h new file mode 100644 index 0000000000000000000000000000000000000000..0b1a6c04d5dc1340086e2992e8354f862f3f5052 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_sparse_csr_tensor_args.h @@ -0,0 +1,31 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_validate_sparse_csr_tensor_args(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size, bool? check_pinning=None) -> () +inline void _validate_sparse_csr_tensor_args(const at::Tensor & crow_indices, const at::Tensor & col_indices, const at::Tensor & values, at::IntArrayRef size, ::std::optional check_pinning=::std::nullopt) { + return at::_ops::_validate_sparse_csr_tensor_args::call(crow_indices, col_indices, values, size, check_pinning); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_sparse_csr_tensor_args_compositeimplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_sparse_csr_tensor_args_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..2a80ccec20647a6340806a21ceb4236b37ad618a --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_sparse_csr_tensor_args_compositeimplicitautograd_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API void _validate_sparse_csr_tensor_args(const at::Tensor & crow_indices, const at::Tensor & col_indices, const at::Tensor & values, at::IntArrayRef size, ::std::optional check_pinning=::std::nullopt); + +} // namespace compositeimplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_sparse_csr_tensor_args_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_sparse_csr_tensor_args_native.h new file mode 100644 index 0000000000000000000000000000000000000000..55b7694774b2521d8f1d11cc952b86bb1921ab84 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_sparse_csr_tensor_args_native.h @@ -0,0 +1,21 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API void _validate_sparse_csr_tensor_args(const at::Tensor & crow_indices, const at::Tensor & col_indices, const at::Tensor & values, at::IntArrayRef size, ::std::optional check_pinning=::std::nullopt); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_sparse_csr_tensor_args_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_sparse_csr_tensor_args_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..4e575aed18ea9745af0722116c4cf7907313980f --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_validate_sparse_csr_tensor_args_ops.h @@ -0,0 +1,29 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _validate_sparse_csr_tensor_args { + using schema = void (const at::Tensor &, const at::Tensor &, const at::Tensor &, at::IntArrayRef, ::std::optional); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_validate_sparse_csr_tensor_args"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_validate_sparse_csr_tensor_args(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size, bool? check_pinning=None) -> ()"; + static void call(const at::Tensor & crow_indices, const at::Tensor & col_indices, const at::Tensor & values, at::IntArrayRef size, ::std::optional check_pinning); + static void redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & crow_indices, const at::Tensor & col_indices, const at::Tensor & values, at::IntArrayRef size, ::std::optional check_pinning); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_values.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_values.h new file mode 100644 index 0000000000000000000000000000000000000000..6f709541b37408e8d1cf83e4b1ddc08bc1fe9eb2 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_values.h @@ -0,0 +1,27 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_values_copy.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_values_copy.h new file mode 100644 index 0000000000000000000000000000000000000000..ad04006986c9624cfd0e881219579b995d835913 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_values_copy.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_values_copy(Tensor self) -> Tensor +inline at::Tensor _values_copy(const at::Tensor & self) { + return at::_ops::_values_copy::call(self); +} + +// aten::_values_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _values_copy_out(at::Tensor & out, const at::Tensor & self) { + return at::_ops::_values_copy_out::call(self, out); +} +// aten::_values_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _values_copy_outf(const at::Tensor & self, at::Tensor & out) { + return at::_ops::_values_copy_out::call(self, out); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_values_copy_compositeexplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_values_copy_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..17bd7a5b7389261d28ec9e5fcd780dd1c77087f5 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_values_copy_compositeexplicitautograd_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor & _values_copy_out(at::Tensor & out, const at::Tensor & self); +TORCH_API at::Tensor & _values_copy_outf(const at::Tensor & self, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_values_copy_compositeexplicitautogradnonfunctional_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_values_copy_compositeexplicitautogradnonfunctional_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..429a70277b7e17f1e0c7d5700afb831758647f75 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_values_copy_compositeexplicitautogradnonfunctional_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautogradnonfunctional { + +TORCH_API at::Tensor _values_copy(const at::Tensor & self); + +} // namespace compositeexplicitautogradnonfunctional +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_values_copy_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_values_copy_native.h new file mode 100644 index 0000000000000000000000000000000000000000..e206a697929f2042f03bf1057401e85d9cd1902a --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_values_copy_native.h @@ -0,0 +1,22 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor & _values_copy_out(const at::Tensor & self, at::Tensor & out); +TORCH_API at::Tensor _values_copy(const at::Tensor & self); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_values_copy_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_values_copy_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..8b45ec45bd5eb5111617a25236861c3deb77602c --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_values_copy_ops.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _values_copy { + using schema = at::Tensor (const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_values_copy"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_values_copy(Tensor self) -> Tensor"; + static at::Tensor call(const at::Tensor & self); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self); +}; + +struct TORCH_API _values_copy_out { + using schema = at::Tensor & (const at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_values_copy"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_values_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::Tensor & out); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_values_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_values_native.h new file mode 100644 index 0000000000000000000000000000000000000000..dfa8057a44321584af599b984ae65a65d4d09535 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_values_native.h @@ -0,0 +1,21 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor _values_sparse(const at::Tensor & self); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_values_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_values_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..835d2adc382e263b54f062689759eea5eeafebaa --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_values_ops.h @@ -0,0 +1,29 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _values { + using schema = at::Tensor (const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_values"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_values(Tensor(a) self) -> Tensor(a)"; + static at::Tensor call(const at::Tensor & self); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_version.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_version.h new file mode 100644 index 0000000000000000000000000000000000000000..10b77c8d6ced6d161575ec361ff82932be5815ea --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_version.h @@ -0,0 +1,27 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_version_compositeimplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_version_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..2a58b69606765ac4a03a918b210e1451249619f5 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_version_compositeimplicitautograd_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API int64_t _version(const at::Tensor & self); + +} // namespace compositeimplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_version_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_version_native.h new file mode 100644 index 0000000000000000000000000000000000000000..3e0966c5bd07ea2bdadf657e9971268435806a84 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_version_native.h @@ -0,0 +1,21 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API int64_t _version(const at::Tensor & self); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_version_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_version_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..01121af0c0286aeff239ea60a85903f25efa49b4 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_version_ops.h @@ -0,0 +1,29 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _version { + using schema = int64_t (const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_version"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_version(Tensor self) -> int"; + static int64_t call(const at::Tensor & self); + static int64_t redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_int4pack_mm.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_int4pack_mm.h new file mode 100644 index 0000000000000000000000000000000000000000..e2f8f33e271cf9903b4b31ada55d55a2a92aadd5 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_int4pack_mm.h @@ -0,0 +1,31 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_weight_int4pack_mm(Tensor self, Tensor mat2, int qGroupSize, Tensor qScaleAndZeros) -> Tensor +inline at::Tensor _weight_int4pack_mm(const at::Tensor & self, const at::Tensor & mat2, int64_t qGroupSize, const at::Tensor & qScaleAndZeros) { + return at::_ops::_weight_int4pack_mm::call(self, mat2, qGroupSize, qScaleAndZeros); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_int4pack_mm_cuda_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_int4pack_mm_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..b9e6114e874bf4fc727c9603573be4a128641797 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_int4pack_mm_cuda_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor _weight_int4pack_mm(const at::Tensor & self, const at::Tensor & mat2, int64_t qGroupSize, const at::Tensor & qScaleAndZeros); + +} // namespace cuda +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_int4pack_mm_for_cpu.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_int4pack_mm_for_cpu.h new file mode 100644 index 0000000000000000000000000000000000000000..e0d5a978c288aa5783b09a77577eb641df48b124 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_int4pack_mm_for_cpu.h @@ -0,0 +1,31 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_weight_int4pack_mm_for_cpu(Tensor self, Tensor mat2, int qGroupSize, Tensor qScaleAndZeros) -> Tensor +inline at::Tensor _weight_int4pack_mm_for_cpu(const at::Tensor & self, const at::Tensor & mat2, int64_t qGroupSize, const at::Tensor & qScaleAndZeros) { + return at::_ops::_weight_int4pack_mm_for_cpu::call(self, mat2, qGroupSize, qScaleAndZeros); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_int4pack_mm_for_cpu_cpu_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_int4pack_mm_for_cpu_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..ac393d76c895e39557a0b1318113c79d22f824e4 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_int4pack_mm_for_cpu_cpu_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor _weight_int4pack_mm_for_cpu(const at::Tensor & self, const at::Tensor & mat2, int64_t qGroupSize, const at::Tensor & qScaleAndZeros); + +} // namespace cpu +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_int4pack_mm_for_cpu_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_int4pack_mm_for_cpu_native.h new file mode 100644 index 0000000000000000000000000000000000000000..a0b16d7ee7aece1a75d3dc5fb5a8db17ec80a501 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_int4pack_mm_for_cpu_native.h @@ -0,0 +1,21 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor _weight_int4pack_mm_cpu(const at::Tensor & self, const at::Tensor & mat2, int64_t qGroupSize, const at::Tensor & qScaleAndZeros); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_int4pack_mm_for_cpu_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_int4pack_mm_for_cpu_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..961fb9638d6bf1aeb9d793c02fb3be9776fc3c52 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_int4pack_mm_for_cpu_ops.h @@ -0,0 +1,29 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _weight_int4pack_mm_for_cpu { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &, int64_t, const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_weight_int4pack_mm_for_cpu"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_weight_int4pack_mm_for_cpu(Tensor self, Tensor mat2, int qGroupSize, Tensor qScaleAndZeros) -> Tensor"; + static at::Tensor call(const at::Tensor & self, const at::Tensor & mat2, int64_t qGroupSize, const at::Tensor & qScaleAndZeros); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & mat2, int64_t qGroupSize, const at::Tensor & qScaleAndZeros); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_int4pack_mm_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_int4pack_mm_native.h new file mode 100644 index 0000000000000000000000000000000000000000..6ef0a09fcc5aba5673f941df834a3aea5ef52986 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_int4pack_mm_native.h @@ -0,0 +1,21 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor _weight_int4pack_mm_cuda(const at::Tensor & self, const at::Tensor & mat2, int64_t qGroupSize, const at::Tensor & qScaleAndZeros); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_int4pack_mm_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_int4pack_mm_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..de6a2ff39961382f08e121a729293d3bbbba4c40 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_int4pack_mm_ops.h @@ -0,0 +1,29 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _weight_int4pack_mm { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &, int64_t, const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_weight_int4pack_mm"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_weight_int4pack_mm(Tensor self, Tensor mat2, int qGroupSize, Tensor qScaleAndZeros) -> Tensor"; + static at::Tensor call(const at::Tensor & self, const at::Tensor & mat2, int64_t qGroupSize, const at::Tensor & qScaleAndZeros); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & mat2, int64_t qGroupSize, const at::Tensor & qScaleAndZeros); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_int4pack_mm_with_scales_and_zeros.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_int4pack_mm_with_scales_and_zeros.h new file mode 100644 index 0000000000000000000000000000000000000000..8d58f95c9ccfc3250bebf945437c58da5a47f76c --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_int4pack_mm_with_scales_and_zeros.h @@ -0,0 +1,31 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_weight_int4pack_mm_with_scales_and_zeros(Tensor self, Tensor mat2, int qGroupSize, Tensor qScale, Tensor qZeros) -> Tensor +inline at::Tensor _weight_int4pack_mm_with_scales_and_zeros(const at::Tensor & self, const at::Tensor & mat2, int64_t qGroupSize, const at::Tensor & qScale, const at::Tensor & qZeros) { + return at::_ops::_weight_int4pack_mm_with_scales_and_zeros::call(self, mat2, qGroupSize, qScale, qZeros); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_int4pack_mm_with_scales_and_zeros_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_int4pack_mm_with_scales_and_zeros_native.h new file mode 100644 index 0000000000000000000000000000000000000000..c5a3c76d57877a1eb33d3032a716d5260d5fb518 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_int4pack_mm_with_scales_and_zeros_native.h @@ -0,0 +1,20 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_int4pack_mm_with_scales_and_zeros_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_int4pack_mm_with_scales_and_zeros_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..5bb7d14c3a2bd0384f42599232425a66c9b16054 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_int4pack_mm_with_scales_and_zeros_ops.h @@ -0,0 +1,29 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _weight_int4pack_mm_with_scales_and_zeros { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &, int64_t, const at::Tensor &, const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_weight_int4pack_mm_with_scales_and_zeros"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_weight_int4pack_mm_with_scales_and_zeros(Tensor self, Tensor mat2, int qGroupSize, Tensor qScale, Tensor qZeros) -> Tensor"; + static at::Tensor call(const at::Tensor & self, const at::Tensor & mat2, int64_t qGroupSize, const at::Tensor & qScale, const at::Tensor & qZeros); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & mat2, int64_t qGroupSize, const at::Tensor & qScale, const at::Tensor & qZeros); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_int8pack_mm.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_int8pack_mm.h new file mode 100644 index 0000000000000000000000000000000000000000..56f304ae0c91912ad27f2f6b9c4acd2fcf2a436d --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_int8pack_mm.h @@ -0,0 +1,31 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_weight_int8pack_mm(Tensor self, Tensor mat2, Tensor scales) -> Tensor +inline at::Tensor _weight_int8pack_mm(const at::Tensor & self, const at::Tensor & mat2, const at::Tensor & scales) { + return at::_ops::_weight_int8pack_mm::call(self, mat2, scales); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_int8pack_mm_cpu_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_int8pack_mm_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..feab37cdcc05c4d5d9cc02a9ce9456afd32c7fd3 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_int8pack_mm_cpu_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor _weight_int8pack_mm(const at::Tensor & self, const at::Tensor & mat2, const at::Tensor & scales); + +} // namespace cpu +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_int8pack_mm_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_int8pack_mm_native.h new file mode 100644 index 0000000000000000000000000000000000000000..be08fffabf2bea9a50ab2bbc9701e326929d2b4a --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_int8pack_mm_native.h @@ -0,0 +1,21 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor _weight_int8pack_mm_cpu(const at::Tensor & self, const at::Tensor & mat2, const at::Tensor & scales); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_int8pack_mm_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_int8pack_mm_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..c623ef7b29f91e5d7c7321dca1de1f4aff040d89 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_int8pack_mm_ops.h @@ -0,0 +1,29 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _weight_int8pack_mm { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &, const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_weight_int8pack_mm"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_weight_int8pack_mm(Tensor self, Tensor mat2, Tensor scales) -> Tensor"; + static at::Tensor call(const at::Tensor & self, const at::Tensor & mat2, const at::Tensor & scales); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & mat2, const at::Tensor & scales); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_norm.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_norm.h new file mode 100644 index 0000000000000000000000000000000000000000..a43cacfecf2205bb8b5675fc07d028eb83739090 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_norm.h @@ -0,0 +1,31 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_weight_norm(Tensor v, Tensor g, int dim=0) -> Tensor +inline at::Tensor _weight_norm(const at::Tensor & v, const at::Tensor & g, int64_t dim=0) { + return at::_ops::_weight_norm::call(v, g, dim); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_norm_compositeimplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_norm_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..bcfac4f4cca662731b3a7a39ad3c611a5d04247a --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_norm_compositeimplicitautograd_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor _weight_norm(const at::Tensor & v, const at::Tensor & g, int64_t dim=0); + +} // namespace compositeimplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_norm_differentiable_backward.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_norm_differentiable_backward.h new file mode 100644 index 0000000000000000000000000000000000000000..384d10027e003c0edfe05ba9eee00a2c35075074 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_norm_differentiable_backward.h @@ -0,0 +1,31 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_weight_norm_differentiable_backward(Tensor grad_w, Tensor saved_v, Tensor saved_g, Tensor saved_norms, int dim) -> (Tensor, Tensor) +inline ::std::tuple _weight_norm_differentiable_backward(const at::Tensor & grad_w, const at::Tensor & saved_v, const at::Tensor & saved_g, const at::Tensor & saved_norms, int64_t dim) { + return at::_ops::_weight_norm_differentiable_backward::call(grad_w, saved_v, saved_g, saved_norms, dim); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_norm_differentiable_backward_compositeimplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_norm_differentiable_backward_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..407420637ee15d18bccc8a0fe8c2244327eb7d5f --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_norm_differentiable_backward_compositeimplicitautograd_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API ::std::tuple _weight_norm_differentiable_backward(const at::Tensor & grad_w, const at::Tensor & saved_v, const at::Tensor & saved_g, const at::Tensor & saved_norms, int64_t dim); + +} // namespace compositeimplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_norm_differentiable_backward_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_norm_differentiable_backward_native.h new file mode 100644 index 0000000000000000000000000000000000000000..4f49efa6cb27398ebd05da173831084b77e233af --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_norm_differentiable_backward_native.h @@ -0,0 +1,21 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API ::std::tuple _weight_norm_differentiable_backward(const at::Tensor & grad_w, const at::Tensor & saved_v, const at::Tensor & saved_g, const at::Tensor & saved_norms, int64_t dim); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_norm_differentiable_backward_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_norm_differentiable_backward_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..117f0cbf9334608f6683223b37a97930d061ea02 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_norm_differentiable_backward_ops.h @@ -0,0 +1,29 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _weight_norm_differentiable_backward { + using schema = ::std::tuple (const at::Tensor &, const at::Tensor &, const at::Tensor &, const at::Tensor &, int64_t); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_weight_norm_differentiable_backward"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_weight_norm_differentiable_backward(Tensor grad_w, Tensor saved_v, Tensor saved_g, Tensor saved_norms, int dim) -> (Tensor, Tensor)"; + static ::std::tuple call(const at::Tensor & grad_w, const at::Tensor & saved_v, const at::Tensor & saved_g, const at::Tensor & saved_norms, int64_t dim); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & grad_w, const at::Tensor & saved_v, const at::Tensor & saved_g, const at::Tensor & saved_norms, int64_t dim); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_norm_interface.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_norm_interface.h new file mode 100644 index 0000000000000000000000000000000000000000..8b154e091aca07f712a10a04c33e1ff12e8ed7b8 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_norm_interface.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_weight_norm_interface(Tensor v, Tensor g, int dim=0) -> (Tensor, Tensor) +inline ::std::tuple _weight_norm_interface(const at::Tensor & v, const at::Tensor & g, int64_t dim=0) { + return at::_ops::_weight_norm_interface::call(v, g, dim); +} + +// aten::_weight_norm_interface.out(Tensor v, Tensor g, int dim=0, *, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!)) +inline ::std::tuple _weight_norm_interface_out(at::Tensor & out0, at::Tensor & out1, const at::Tensor & v, const at::Tensor & g, int64_t dim=0) { + return at::_ops::_weight_norm_interface_out::call(v, g, dim, out0, out1); +} +// aten::_weight_norm_interface.out(Tensor v, Tensor g, int dim=0, *, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!)) +inline ::std::tuple _weight_norm_interface_outf(const at::Tensor & v, const at::Tensor & g, int64_t dim, at::Tensor & out0, at::Tensor & out1) { + return at::_ops::_weight_norm_interface_out::call(v, g, dim, out0, out1); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_norm_interface_backward.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_norm_interface_backward.h new file mode 100644 index 0000000000000000000000000000000000000000..a8ace619d9a832bc4be89e5fc2c333ccf2216430 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_norm_interface_backward.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_weight_norm_interface_backward(Tensor grad_w, Tensor saved_v, Tensor saved_g, Tensor saved_norms, int dim) -> (Tensor, Tensor) +inline ::std::tuple _weight_norm_interface_backward(const at::Tensor & grad_w, const at::Tensor & saved_v, const at::Tensor & saved_g, const at::Tensor & saved_norms, int64_t dim) { + return at::_ops::_weight_norm_interface_backward::call(grad_w, saved_v, saved_g, saved_norms, dim); +} + +// aten::_weight_norm_interface_backward.out(Tensor grad_w, Tensor saved_v, Tensor saved_g, Tensor saved_norms, int dim, *, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!)) +inline ::std::tuple _weight_norm_interface_backward_out(at::Tensor & out0, at::Tensor & out1, const at::Tensor & grad_w, const at::Tensor & saved_v, const at::Tensor & saved_g, const at::Tensor & saved_norms, int64_t dim) { + return at::_ops::_weight_norm_interface_backward_out::call(grad_w, saved_v, saved_g, saved_norms, dim, out0, out1); +} +// aten::_weight_norm_interface_backward.out(Tensor grad_w, Tensor saved_v, Tensor saved_g, Tensor saved_norms, int dim, *, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!)) +inline ::std::tuple _weight_norm_interface_backward_outf(const at::Tensor & grad_w, const at::Tensor & saved_v, const at::Tensor & saved_g, const at::Tensor & saved_norms, int64_t dim, at::Tensor & out0, at::Tensor & out1) { + return at::_ops::_weight_norm_interface_backward_out::call(grad_w, saved_v, saved_g, saved_norms, dim, out0, out1); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_norm_interface_backward_compositeexplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_norm_interface_backward_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..f4ebc8f111481ec7e965af6052365e4f2a3182c2 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_norm_interface_backward_compositeexplicitautograd_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API ::std::tuple _weight_norm_interface_backward_out(at::Tensor & out0, at::Tensor & out1, const at::Tensor & grad_w, const at::Tensor & saved_v, const at::Tensor & saved_g, const at::Tensor & saved_norms, int64_t dim); +TORCH_API ::std::tuple _weight_norm_interface_backward_outf(const at::Tensor & grad_w, const at::Tensor & saved_v, const at::Tensor & saved_g, const at::Tensor & saved_norms, int64_t dim, at::Tensor & out0, at::Tensor & out1); + +} // namespace compositeexplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_norm_interface_backward_cpu_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_norm_interface_backward_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..7dee31ae655bfc71f1d7fbd310065c6814114aef --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_norm_interface_backward_cpu_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API ::std::tuple _weight_norm_interface_backward(const at::Tensor & grad_w, const at::Tensor & saved_v, const at::Tensor & saved_g, const at::Tensor & saved_norms, int64_t dim); + +} // namespace cpu +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_norm_interface_backward_cuda_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_norm_interface_backward_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..dee6bdf58c2ee741f10ac75e8ffbe7fbcf09f20c --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_norm_interface_backward_cuda_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API ::std::tuple _weight_norm_interface_backward(const at::Tensor & grad_w, const at::Tensor & saved_v, const at::Tensor & saved_g, const at::Tensor & saved_norms, int64_t dim); + +} // namespace cuda +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_norm_interface_backward_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_norm_interface_backward_native.h new file mode 100644 index 0000000000000000000000000000000000000000..73bae627c3a6ee07cf0a8b057cbf0e1e9f17c354 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_norm_interface_backward_native.h @@ -0,0 +1,23 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API ::std::tuple _weight_norm_interface_backward_out(const at::Tensor & grad_w, const at::Tensor & saved_v, const at::Tensor & saved_g, const at::Tensor & saved_norms, int64_t dim, at::Tensor & out0, at::Tensor & out1); +TORCH_API ::std::tuple weight_norm_backward_cpu(const at::Tensor & grad_w, const at::Tensor & saved_v, const at::Tensor & saved_g, const at::Tensor & saved_norms, int64_t dim); +TORCH_API ::std::tuple weight_norm_backward_cuda(const at::Tensor & grad_w, const at::Tensor & saved_v, const at::Tensor & saved_g, const at::Tensor & saved_norms, int64_t dim); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_norm_interface_backward_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_norm_interface_backward_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..555ac6cf49f5e1001f1e5a9ff82b60e77c05e6ec --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_norm_interface_backward_ops.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _weight_norm_interface_backward { + using schema = ::std::tuple (const at::Tensor &, const at::Tensor &, const at::Tensor &, const at::Tensor &, int64_t); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_weight_norm_interface_backward"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_weight_norm_interface_backward(Tensor grad_w, Tensor saved_v, Tensor saved_g, Tensor saved_norms, int dim) -> (Tensor, Tensor)"; + static ::std::tuple call(const at::Tensor & grad_w, const at::Tensor & saved_v, const at::Tensor & saved_g, const at::Tensor & saved_norms, int64_t dim); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & grad_w, const at::Tensor & saved_v, const at::Tensor & saved_g, const at::Tensor & saved_norms, int64_t dim); +}; + +struct TORCH_API _weight_norm_interface_backward_out { + using schema = ::std::tuple (const at::Tensor &, const at::Tensor &, const at::Tensor &, const at::Tensor &, int64_t, at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_weight_norm_interface_backward"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_weight_norm_interface_backward.out(Tensor grad_w, Tensor saved_v, Tensor saved_g, Tensor saved_norms, int dim, *, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!))"; + static ::std::tuple call(const at::Tensor & grad_w, const at::Tensor & saved_v, const at::Tensor & saved_g, const at::Tensor & saved_norms, int64_t dim, at::Tensor & out0, at::Tensor & out1); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & grad_w, const at::Tensor & saved_v, const at::Tensor & saved_g, const at::Tensor & saved_norms, int64_t dim, at::Tensor & out0, at::Tensor & out1); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_norm_interface_compositeexplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_norm_interface_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..67128a6c6e1365391252f0ebad17b54bf24f7cad --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_norm_interface_compositeexplicitautograd_dispatch.h @@ -0,0 +1,24 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API ::std::tuple _weight_norm_interface_out(at::Tensor & out0, at::Tensor & out1, const at::Tensor & v, const at::Tensor & g, int64_t dim=0); +TORCH_API ::std::tuple _weight_norm_interface_outf(const at::Tensor & v, const at::Tensor & g, int64_t dim, at::Tensor & out0, at::Tensor & out1); + +} // namespace compositeexplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_norm_interface_cpu_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_norm_interface_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..1f06f4e1820dbc72b9c3f43bb0ca5329ee924de4 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_norm_interface_cpu_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API ::std::tuple _weight_norm_interface(const at::Tensor & v, const at::Tensor & g, int64_t dim=0); + +} // namespace cpu +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_norm_interface_cuda_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_norm_interface_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..dd2bd6863f7b7e64bf77f34fa58411c894ca64c8 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_norm_interface_cuda_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API ::std::tuple _weight_norm_interface(const at::Tensor & v, const at::Tensor & g, int64_t dim=0); + +} // namespace cuda +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_norm_interface_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_norm_interface_native.h new file mode 100644 index 0000000000000000000000000000000000000000..c507a9c294898698fb1332d8dd16e8a8c08a0b81 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_norm_interface_native.h @@ -0,0 +1,23 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API ::std::tuple _weight_norm_interface_out(const at::Tensor & v, const at::Tensor & g, int64_t dim, at::Tensor & out0, at::Tensor & out1); +TORCH_API ::std::tuple weight_norm_cpu(const at::Tensor & v, const at::Tensor & g, int64_t dim=0); +TORCH_API ::std::tuple weight_norm_cuda(const at::Tensor & v, const at::Tensor & g, int64_t dim=0); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_norm_interface_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_norm_interface_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..762e020e58b527f5de94cfcd4e08f10b0ac37538 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_norm_interface_ops.h @@ -0,0 +1,40 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _weight_norm_interface { + using schema = ::std::tuple (const at::Tensor &, const at::Tensor &, int64_t); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_weight_norm_interface"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_weight_norm_interface(Tensor v, Tensor g, int dim=0) -> (Tensor, Tensor)"; + static ::std::tuple call(const at::Tensor & v, const at::Tensor & g, int64_t dim); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & v, const at::Tensor & g, int64_t dim); +}; + +struct TORCH_API _weight_norm_interface_out { + using schema = ::std::tuple (const at::Tensor &, const at::Tensor &, int64_t, at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_weight_norm_interface"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_weight_norm_interface.out(Tensor v, Tensor g, int dim=0, *, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!))"; + static ::std::tuple call(const at::Tensor & v, const at::Tensor & g, int64_t dim, at::Tensor & out0, at::Tensor & out1); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & v, const at::Tensor & g, int64_t dim, at::Tensor & out0, at::Tensor & out1); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_norm_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_norm_native.h new file mode 100644 index 0000000000000000000000000000000000000000..dd6462faa4456c7c324d880cfc20912b79bc9b31 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_norm_native.h @@ -0,0 +1,21 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor _weight_norm(const at::Tensor & v, const at::Tensor & g, int64_t dim=0); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_norm_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_norm_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..c0ca02bbad002f8d81e97dc5909a09b82856df70 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_weight_norm_ops.h @@ -0,0 +1,29 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _weight_norm { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &, int64_t); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_weight_norm"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_weight_norm(Tensor v, Tensor g, int dim=0) -> Tensor"; + static at::Tensor call(const at::Tensor & v, const at::Tensor & g, int64_t dim); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & v, const at::Tensor & g, int64_t dim); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_wrapped_linear_prepack.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_wrapped_linear_prepack.h new file mode 100644 index 0000000000000000000000000000000000000000..6777ce05bb9fada5c7459c69687b970a29e2a44f --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_wrapped_linear_prepack.h @@ -0,0 +1,31 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_wrapped_linear_prepack(Tensor weight, Tensor weight_scale, Tensor weight_zero_point, Tensor bias) -> Tensor +inline at::Tensor _wrapped_linear_prepack(const at::Tensor & weight, const at::Tensor & weight_scale, const at::Tensor & weight_zero_point, const at::Tensor & bias) { + return at::_ops::_wrapped_linear_prepack::call(weight, weight_scale, weight_zero_point, bias); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_wrapped_linear_prepack_compositeimplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_wrapped_linear_prepack_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..0e6dc92c4f7f8b37df6b0203a58651d8a185efc6 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_wrapped_linear_prepack_compositeimplicitautograd_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor _wrapped_linear_prepack(const at::Tensor & weight, const at::Tensor & weight_scale, const at::Tensor & weight_zero_point, const at::Tensor & bias); + +} // namespace compositeimplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_wrapped_linear_prepack_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_wrapped_linear_prepack_native.h new file mode 100644 index 0000000000000000000000000000000000000000..5c0682aa9fa87a0f311837b1c243979ffc679234 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_wrapped_linear_prepack_native.h @@ -0,0 +1,21 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor _wrapped_linear_prepack(const at::Tensor & weight, const at::Tensor & weight_scale, const at::Tensor & weight_zero_point, const at::Tensor & bias); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_wrapped_linear_prepack_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_wrapped_linear_prepack_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..5b447862d95862d53b99afe000dbc3263f4c9e03 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_wrapped_linear_prepack_ops.h @@ -0,0 +1,29 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _wrapped_linear_prepack { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &, const at::Tensor &, const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_wrapped_linear_prepack"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_wrapped_linear_prepack(Tensor weight, Tensor weight_scale, Tensor weight_zero_point, Tensor bias) -> Tensor"; + static at::Tensor call(const at::Tensor & weight, const at::Tensor & weight_scale, const at::Tensor & weight_zero_point, const at::Tensor & bias); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & weight, const at::Tensor & weight_scale, const at::Tensor & weight_zero_point, const at::Tensor & bias); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_wrapped_quantized_linear_prepacked.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_wrapped_quantized_linear_prepacked.h new file mode 100644 index 0000000000000000000000000000000000000000..dcee5ac24f3322f275e4fe4304865bd1db944f12 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_wrapped_quantized_linear_prepacked.h @@ -0,0 +1,31 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_wrapped_quantized_linear_prepacked(Tensor input, Tensor input_scale, Tensor input_zero_point, Tensor packed_weight, Tensor output_scale, Tensor output_zero_point, int out_channel) -> Tensor +inline at::Tensor _wrapped_quantized_linear_prepacked(const at::Tensor & input, const at::Tensor & input_scale, const at::Tensor & input_zero_point, const at::Tensor & packed_weight, const at::Tensor & output_scale, const at::Tensor & output_zero_point, int64_t out_channel) { + return at::_ops::_wrapped_quantized_linear_prepacked::call(input, input_scale, input_zero_point, packed_weight, output_scale, output_zero_point, out_channel); +} + +} diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_wrapped_quantized_linear_prepacked_compositeimplicitautograd_dispatch.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_wrapped_quantized_linear_prepacked_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..8b28940f25a9c7aa269140453a94d1f9151c943b --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_wrapped_quantized_linear_prepacked_compositeimplicitautograd_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor _wrapped_quantized_linear_prepacked(const at::Tensor & input, const at::Tensor & input_scale, const at::Tensor & input_zero_point, const at::Tensor & packed_weight, const at::Tensor & output_scale, const at::Tensor & output_zero_point, int64_t out_channel); + +} // namespace compositeimplicitautograd +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_wrapped_quantized_linear_prepacked_native.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_wrapped_quantized_linear_prepacked_native.h new file mode 100644 index 0000000000000000000000000000000000000000..b2500554770d3fe1b95052f632414ab4d9be1936 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_wrapped_quantized_linear_prepacked_native.h @@ -0,0 +1,21 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor _wrapped_quantized_linear_prepacked(const at::Tensor & input, const at::Tensor & input_scale, const at::Tensor & input_zero_point, const at::Tensor & packed_weight, const at::Tensor & output_scale, const at::Tensor & output_zero_point, int64_t out_channel); +} // namespace native +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/ops/_wrapped_quantized_linear_prepacked_ops.h b/phivenv/Lib/site-packages/torch/include/ATen/ops/_wrapped_quantized_linear_prepacked_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..c719371bda3866785300f27e87cd8004bae24646 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/ops/_wrapped_quantized_linear_prepacked_ops.h @@ -0,0 +1,29 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _wrapped_quantized_linear_prepacked { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &, const at::Tensor &, const at::Tensor &, const at::Tensor &, const at::Tensor &, int64_t); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_wrapped_quantized_linear_prepacked"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_wrapped_quantized_linear_prepacked(Tensor input, Tensor input_scale, Tensor input_zero_point, Tensor packed_weight, Tensor output_scale, Tensor output_zero_point, int out_channel) -> Tensor"; + static at::Tensor call(const at::Tensor & input, const at::Tensor & input_scale, const at::Tensor & input_zero_point, const at::Tensor & packed_weight, const at::Tensor & output_scale, const at::Tensor & output_zero_point, int64_t out_channel); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & input, const at::Tensor & input_scale, const at::Tensor & input_zero_point, const at::Tensor & packed_weight, const at::Tensor & output_scale, const at::Tensor & output_zero_point, int64_t out_channel); +}; + +}} // namespace at::_ops diff --git a/phivenv/Lib/site-packages/torch/include/ATen/quantized/QTensorImpl.h b/phivenv/Lib/site-packages/torch/include/ATen/quantized/QTensorImpl.h new file mode 100644 index 0000000000000000000000000000000000000000..4b4e941765ecb1af83cc6062ef2a7a6c4480b01e --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/quantized/QTensorImpl.h @@ -0,0 +1,125 @@ +#pragma once + +#include +#include +#include + +namespace at { + +/** + * QTensorImpl is a TensorImpl for Quantized Tensors, it stores Quantizer which + * specifies the quantization scheme and parameters, for more information please + * see ATen/quantized/Quantizer.h + * + * We'll use QTensor in code or documentation to refer to a Tensor with QTensorImpl. + */ +struct TORCH_API QTensorImpl : public c10::TensorImpl { + public: + QTensorImpl( + Storage&& storage, + DispatchKeySet key_set, + const caffe2::TypeMeta data_type, + QuantizerPtr quantizer); + + // See Note [Enum ImplType] + QTensorImpl( + ImplType type, + Storage&& storage, + DispatchKeySet key_set, + const caffe2::TypeMeta data_type, + QuantizerPtr quantizer); + + + // TODO: Expose in PyTorch Frontend + QuantizerPtr quantizer() { + return quantizer_; + } + + void set_quantizer_(QuantizerPtr quantizer) { + quantizer_ = quantizer; + } + + /** + * Return a TensorImpl that is a shallow-copy of this TensorImpl. + * + * For usage of `version_counter` and `allow_tensor_metadata_change`, + * see NOTE [ TensorImpl Shallow-Copying ]. + */ + c10::intrusive_ptr shallow_copy_and_detach( + const c10::VariableVersion& version_counter, + bool allow_tensor_metadata_change) const override { + auto impl = c10::make_intrusive( + Storage(storage()), key_set(), data_type_, quantizer_); + copy_tensor_metadata( + /*src_q_impl=*/this, + /*dest_q_impl=*/impl.get(), + /*version_counter=*/version_counter, + /*allow_tensor_metadata_change=*/allow_tensor_metadata_change); + impl->refresh_numel(); + impl->refresh_contiguous(); + return impl; + } + + /** + * Return a TensorImpl that is a shallow-copy of this TensorImpl. + * + * For usage of `version_counter` and `allow_tensor_metadata_change`, + * see NOTE [ TensorImpl Shallow-Copying ]. + */ + c10::intrusive_ptr shallow_copy_and_detach( + c10::VariableVersion&& version_counter, + bool allow_tensor_metadata_change) const override { + auto impl = c10::make_intrusive( + Storage(storage()), key_set(), data_type_, quantizer_); + copy_tensor_metadata( + /*src_q_impl=*/this, + /*dest_q_impl=*/impl.get(), + /*version_counter=*/std::move(version_counter), + /*allow_tensor_metadata_change=*/allow_tensor_metadata_change); + impl->refresh_numel(); + impl->refresh_contiguous(); + return impl; + } + + /** + * Shallow-copies data from another TensorImpl into this TensorImpl. + * + * For why this function doesn't check this TensorImpl's `allow_tensor_metadata_change_`, + * see NOTE [ TensorImpl Shallow-Copying ]. + */ + void shallow_copy_from(const c10::intrusive_ptr& impl) override { + AT_ASSERT(has_compatible_shallow_copy_type(impl->key_set())); + auto q_impl = static_cast(impl.get()); + copy_tensor_metadata( + /*src_q_impl=*/q_impl, + /*dest_q_impl=*/this, + /*version_counter=*/version_counter(), + /*allow_tensor_metadata_change=*/allow_tensor_metadata_change()); + refresh_numel(); + refresh_contiguous(); + } + + private: + QuantizerPtr quantizer_; + + const char* tensorimpl_type_name() const override; + + /** + * Copy the tensor metadata fields (e.g. sizes / strides / storage pointer / storage_offset) + * from one TensorImpl to another TensorImpl. + * + * For usage of `version_counter` and `allow_tensor_metadata_change`, see NOTE [ TensorImpl Shallow-Copying ]. + */ + static void copy_tensor_metadata( + const QTensorImpl* src_q_impl, + QTensorImpl* dest_q_impl, + const c10::VariableVersion& version_counter, + bool allow_tensor_metadata_change) { + TensorImpl::copy_tensor_metadata(src_q_impl, dest_q_impl, version_counter, allow_tensor_metadata_change); + + // OpaqueTensorImpl-specific fields. + dest_q_impl->quantizer_ = src_q_impl->quantizer_; + } +}; + +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/quantized/Quantizer.h b/phivenv/Lib/site-packages/torch/include/ATen/quantized/Quantizer.h new file mode 100644 index 0000000000000000000000000000000000000000..92e7bb6844f5a1e010174ad37c7a9f8928392e6a --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/quantized/Quantizer.h @@ -0,0 +1,279 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +#include +#include +#include + +namespace at { + +/** + * UnknownQuantizer is a placeholder quantizer for functions that implement + * quantization in a two step process. First a tensor is allocated but with + * unknown quantizer, and then the quantization kernel decides what the final + * quantizer will be. + */ +struct TORCH_API UnknownQuantizer : public Quantizer { + explicit UnknownQuantizer(ScalarType scalar_type) + : Quantizer(scalar_type) {} + + Tensor quantize(const Tensor& tensor) override; + Tensor dequantize(const Tensor& qtensor) override; + Tensor& dequantize_out(Tensor& rtensor, const Tensor& qtensor) override; + QScheme qscheme() const override; + bool equalTo(QuantizerPtr other) const override; +}; + +/** + * UniformQuantizer is the parent class for all uniform quantizers. + * These quantization scheme will map float value uniformly to + * the quantized value. For example, affine quantizer is + * the most commonly used scheme in this category. + */ +struct TORCH_API UniformQuantizer : public Quantizer { + explicit UniformQuantizer(ScalarType scalar_type) : Quantizer(scalar_type) {} +}; + +/** + * NonUniformQuantizer is the parent class for all non-uniform quantizers. + * These quantization scheme may map float value non-uniformly to the quantized + * value. K-means quantization is a representative example in this category. + */ +struct TORCH_API NonUniformQuantizer : public Quantizer { + explicit NonUniformQuantizer(ScalarType scalar_type) : Quantizer(scalar_type) {} +}; + +// There is also StochasticQuantizer which is uniform but not affine + +/** + * AffineQuantizer uses affine transformation to do quantization. + * + * For quantize: + * Y = clamp(round(X / scale + zero_point), min, max) + * For dequantize: + * X = (Y - zero_point) * scale + */ +struct TORCH_API AffineQuantizer : public UniformQuantizer { + explicit AffineQuantizer(ScalarType scalar_type) : UniformQuantizer(scalar_type) {} +}; + +// Note that we will not have Symmetric Quantizer in backend to reduce +// complications in quantized kernel implementation. + +/** + * PerTensorAffineQuantizer stores a scale and a zero_point, which is used for + * all the values in the Tensor. + */ +struct TORCH_API PerTensorAffineQuantizer : public AffineQuantizer { + explicit PerTensorAffineQuantizer(ScalarType scalar_type, double scale, int64_t zero_point) + : AffineQuantizer(scalar_type), + scale_(scale), + zero_point_(zero_point) {} + + Tensor quantize(const Tensor& tensor) override; + Tensor dequantize(const Tensor& qtensor) override; + Tensor& dequantize_out(Tensor& rtensor, const Tensor& qtensor) override; + + QScheme qscheme() const override { + return kPerTensorAffine; + } + + double scale() const { + return scale_; + } + + int64_t zero_point() const { + return zero_point_; + } + + bool equalTo(QuantizerPtr other) const override { + if (!other.get() || other->qscheme() != kPerTensorAffine) { + return false; + } + auto* other_per_tensor_affine = + static_cast(other.get()); + return scalar_type() == other_per_tensor_affine->scalar_type() && + scale() == other_per_tensor_affine->scale() && + zero_point() == other_per_tensor_affine->zero_point(); + } + + private: + const double scale_; + // We use int64_t for consistency with Python + const int64_t zero_point_; +}; + +/** + * PerChannelAffineQuantizer is the same as PerTensorAffineQuantizer + * except that we have an independent scale and zero_point parameter + * for each channel. + * + * Also note that per channel quantization is mostly applied to output channels + * of weights since per-input channel of weight quantization or per-channel + * quantization for activations can't be efficiently supported in most of + * processors since it requires each multiplication result within a single + * dot-product to have a different scale. + */ +struct TORCH_API PerChannelAffineQuantizer : public AffineQuantizer { + explicit PerChannelAffineQuantizer( + ScalarType scalar_type, + Tensor scales, + Tensor zero_points, + int64_t axis) + : AffineQuantizer(scalar_type), + scales_(std::move(scales)), + zero_points_(std::move(zero_points)), + axis_(axis) {} + + QScheme qscheme() const override { + return kPerChannelAffine; + } + + Tensor scales() const { + return scales_; + } + + Tensor zero_points() const { + return zero_points_; + } + + int64_t axis() const { + return axis_; + } + + Tensor quantize(const Tensor& tensor) override; + Tensor dequantize(const Tensor& qtensor) override; + Tensor& dequantize_out(Tensor& rtensor, const Tensor& qtensor) override; + + bool equalTo(QuantizerPtr other) const override { + if (!other.get() || other->qscheme() != kPerChannelAffine) { + return false; + } + auto* other_per_channel_affine = + static_cast(other.get()); + return scalar_type() == other_per_channel_affine->scalar_type() && + scales().equal(other_per_channel_affine->scales()) && + zero_points().equal(other_per_channel_affine->zero_points()) && + axis() == other_per_channel_affine->axis(); + } + + protected: + Tensor scales_; + Tensor zero_points_; + const int64_t axis_; +}; + +/** + * PerChannelAffineFloatQParamsQuantizer is the same as PerChannelAffineQuantizer + * except that it expects both scale and zero point to be floating point values. + * + * This quantizer uses the kPerChannelAffineFloatQParams qscheme which is a variant of + * kPerChannelAffine. + * + * The quantize equation in this case looks like - + * Xq = (Xf - zero_point) * inv_scale, where inv_scale = 1.0/scale + * + * Note: Usage of floating point zero point is useful in cases where 0 doesn't need to + * be exactly represented in the quantized space. We can get additional precision by + * using floating point values for zero point. + */ +struct TORCH_API PerChannelAffineFloatQParamsQuantizer : public PerChannelAffineQuantizer { + explicit PerChannelAffineFloatQParamsQuantizer( + ScalarType scalar_type, + Tensor scales, + Tensor zero_points, + int64_t axis) + : PerChannelAffineQuantizer(scalar_type, + scales, + zero_points, + axis) {} + + QScheme qscheme() const override { + return kPerChannelAffineFloatQParams; + } + + Tensor quantize(const Tensor& tensor) override; + Tensor dequantize(const Tensor& qtensor) override; + Tensor& dequantize_out(Tensor& rtensor, const Tensor& qtensor) override; + + bool equalTo(QuantizerPtr other) const override { + if (!other.get() || other->qscheme() != kPerChannelAffineFloatQParams) { + return false; + } + auto* other_per_channel_float_qparams = + static_cast(other.get()); + return scalar_type() == other_per_channel_float_qparams->scalar_type() && + scales().equal(other_per_channel_float_qparams->scales()) && + zero_points().equal(other_per_channel_float_qparams->zero_points()) && + axis() == other_per_channel_float_qparams->axis(); + } +}; + +// This is an internal utility function for getting at the QTensorImpl, +// You should only use this for writing low level +// setters/getters for QTensorImpl fields; otherwise, you should use +// the low level setters/getters that were implemented using this. +// This may be called repeatedly, so make sure it's pretty cheap. +TORCH_API QTensorImpl* get_qtensorimpl(const TensorBase& self); + +// double and int64_t are because of the native function API, we only have these +// argument types right now in native functions +TORCH_API QuantizerPtr +make_per_tensor_affine_quantizer( + double scale, int64_t zero_point, ScalarType scalar_type); + +TORCH_API QuantizerPtr make_per_channel_affine_quantizer( + const Tensor& scales, + const Tensor& zero_points, + int64_t axis, + ScalarType scalar_type); + +TORCH_API QuantizerPtr make_unknown_quantizer(ScalarType scalar_type); + +// Create a Quantized Tensor given arguments for normal Tensor and a quantizer +TORCH_API Tensor new_qtensor( + IntArrayRef sizes, + const TensorOptions& options, + QuantizerPtr quantizer); + +TORCH_API void set_quantizer_(const Tensor& self, ConstQuantizerPtr quantizer); + +TORCH_API Tensor from_blob_quantized_per_tensor_affine( + void* data, + IntArrayRef sizes, + IntArrayRef strides, + std::function deleter, + const float scale, + const int64_t zeroPoint, + const TensorOptions& options); + +TORCH_API Tensor from_blob_quantized_per_tensor_affine( + void* data, + IntArrayRef sizes, + std::function deleter, + const float scale, + const int64_t zeroPoint, + const TensorOptions& options); + +TORCH_API Tensor from_blob_quantized_per_channel_affine( + void* data, + IntArrayRef sizes, + std::function deleter, + const Tensor& scales, + const Tensor& zero_points, + const int64_t axis, + const TensorOptions& options); + +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/xpu/CachingHostAllocator.h b/phivenv/Lib/site-packages/torch/include/ATen/xpu/CachingHostAllocator.h new file mode 100644 index 0000000000000000000000000000000000000000..1cb02e7713ac15d3778c59de5de475120194ca36 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/xpu/CachingHostAllocator.h @@ -0,0 +1,38 @@ +#pragma once + +#include +#include +#include +#include +#include + +namespace at::xpu { + +C10_DEPRECATED_MESSAGE( + "at::xpu::getCachingHostAllocator() is deprecated. Please use at::getHostAllocator(at::kXPU) instead.") +inline TORCH_XPU_API at::HostAllocator* getCachingHostAllocator() { + return at::getHostAllocator(at::kXPU); +} + +C10_DEPRECATED_MESSAGE( + "at::xpu::CachingHostAllocator_recordEvent(...) is deprecated. Please use at::getHostAllocator(at::kXPU)->record_event(...) instead.") +inline TORCH_XPU_API bool CachingHostAllocator_recordEvent( + void* ptr, + void* ctx, + c10::xpu::XPUStream stream) { + return getHostAllocator(at::kXPU)->record_event(ptr, ctx, stream.unwrap()); +} + +C10_DEPRECATED_MESSAGE( + "at::xpu::CachingHostAllocator_emptyCache() is deprecated. Please use at::getHostAllocator(at::kXPU)->empty_cache() instead.") +inline TORCH_XPU_API void CachingHostAllocator_emptyCache() { + getHostAllocator(at::kXPU)->empty_cache(); +} + +C10_DEPRECATED_MESSAGE( + "at::xpu::HostAlloc(...) is deprecated. Please use at::getHostAllocator(at::kXPU)->allocate(...) instead.") +inline TORCH_XPU_API at::DataPtr HostAlloc(size_t size) { + return getHostAllocator(at::kXPU)->allocate(size); +} + +} // namespace at::xpu diff --git a/phivenv/Lib/site-packages/torch/include/ATen/xpu/PinnedMemoryAllocator.h b/phivenv/Lib/site-packages/torch/include/ATen/xpu/PinnedMemoryAllocator.h new file mode 100644 index 0000000000000000000000000000000000000000..856e37312712ca5617606b89df94ca5621532442 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/xpu/PinnedMemoryAllocator.h @@ -0,0 +1,11 @@ +#pragma once + +#include +#include + +namespace at::xpu { + +inline TORCH_XPU_API at::HostAllocator* getPinnedMemoryAllocator() { + return at::getHostAllocator(at::kXPU); +} +} // namespace at::xpu diff --git a/phivenv/Lib/site-packages/torch/include/ATen/xpu/XPUContext.h b/phivenv/Lib/site-packages/torch/include/ATen/xpu/XPUContext.h new file mode 100644 index 0000000000000000000000000000000000000000..31719ae1159fdcbeb504cfc8bd333ac492918c16 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/xpu/XPUContext.h @@ -0,0 +1,20 @@ +#pragma once + +#include +#include +#include + +namespace at::xpu { + +// XPU is available if we compiled with XPU. +inline bool is_available() { + return c10::xpu::device_count() > 0; +} + +TORCH_XPU_API DeviceProp* getCurrentDeviceProperties(); + +TORCH_XPU_API DeviceProp* getDeviceProperties(DeviceIndex device); + +TORCH_XPU_API int32_t getGlobalIdxFromDevice(DeviceIndex device); + +} // namespace at::xpu diff --git a/phivenv/Lib/site-packages/torch/include/ATen/xpu/XPUDevice.h b/phivenv/Lib/site-packages/torch/include/ATen/xpu/XPUDevice.h new file mode 100644 index 0000000000000000000000000000000000000000..c59f4808f1c5a9503ed875711fb828f67e4689ce --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/xpu/XPUDevice.h @@ -0,0 +1,13 @@ +#pragma once + +#include +#include + +namespace at::xpu { + +inline Device getDeviceFromPtr(void* ptr) { + auto device = c10::xpu::get_device_idx_from_pointer(ptr); + return {c10::DeviceType::XPU, device}; +} + +} // namespace at::xpu diff --git a/phivenv/Lib/site-packages/torch/include/ATen/xpu/XPUEvent.h b/phivenv/Lib/site-packages/torch/include/ATen/xpu/XPUEvent.h new file mode 100644 index 0000000000000000000000000000000000000000..fd93fb6fbd5fa090362a0b2008c14307ee7ab192 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/xpu/XPUEvent.h @@ -0,0 +1,191 @@ +#pragma once +#include + +#include + +namespace at::xpu { + +/* + * XPUEvent are movable not copyable wrappers around SYCL event. XPUEvent are + * constructed lazily when first recorded. It has a device, and this device is + * acquired from the first recording stream. Later streams that record the event + * must match the same device. + * + * Currently, XPUEvent does NOT support to export an inter-process event from + * another process via inter-process comunication(IPC). So it means that + * inter-process communication for event handles between different processes is + * not available. This could impact some applications that rely on cross-process + * synchronization and communication. + */ +struct TORCH_XPU_API XPUEvent { + // Constructors + XPUEvent(bool enable_timing = false) noexcept + : enable_timing_{enable_timing} {} + + ~XPUEvent() { + if (isCreated()) { + const c10::impl::PyInterpreter* interp = c10::impl::GPUTrace::get_trace(); + if (C10_UNLIKELY(interp)) { + (*interp)->trace_gpu_event_deletion( + at::kXPU, reinterpret_cast(event_.get())); + } + } + } + + XPUEvent(const XPUEvent&) = delete; + XPUEvent& operator=(const XPUEvent&) = delete; + + XPUEvent(XPUEvent&& other) = default; + XPUEvent& operator=(XPUEvent&& other) = default; + + operator sycl::event&() const { + return event(); + } + + std::optional device() const { + if (isCreated()) { + return at::Device(at::kXPU, device_index_); + } else { + return std::nullopt; + } + } + + inline bool isCreated() const { + return (event_.get() != nullptr); + } + + DeviceIndex device_index() const { + return device_index_; + } + + sycl::event& event() const { + return *event_; + } + + bool query() const { + using namespace sycl::info; + if (!isCreated()) { + return true; + } + + return event().get_info() == + event_command_status::complete; + } + + void record() { + record(getCurrentXPUStream()); + } + + void recordOnce(const XPUStream& stream) { + if (!isCreated()) { + record(stream); + } + } + + void record(const XPUStream& stream) { + if (!isCreated()) { + device_index_ = stream.device_index(); + assignEvent(stream.queue()); + const c10::impl::PyInterpreter* interp = c10::impl::GPUTrace::get_trace(); + if (C10_UNLIKELY(interp)) { + (*interp)->trace_gpu_event_creation( + at::kXPU, reinterpret_cast(event_.get())); + } + } else { + TORCH_CHECK( + device_index_ == stream.device_index(), + "Event device ", + device_index_, + " does not match recording stream's device ", + stream.device_index(), + "."); + reassignEvent(stream.queue()); + } + const c10::impl::PyInterpreter* interp = c10::impl::GPUTrace::get_trace(); + if (C10_UNLIKELY(interp)) { + (*interp)->trace_gpu_event_record( + at::kXPU, + reinterpret_cast(event_.get()), + reinterpret_cast(&stream.queue())); + } + } + + void block(const XPUStream& stream) { + if (isCreated()) { + std::vector event_list{event()}; + // Make this stream wait until event_ is completed. + stream.queue().ext_oneapi_submit_barrier(event_list); + const c10::impl::PyInterpreter* interp = c10::impl::GPUTrace::get_trace(); + if (C10_UNLIKELY(interp)) { + (*interp)->trace_gpu_event_wait( + at::kXPU, + reinterpret_cast(event_.get()), + reinterpret_cast(&stream.queue())); + } + } + } + + double elapsed_time(const XPUEvent& other) const { + TORCH_CHECK( + isCreated() && other.isCreated(), + "Both events must be recorded before calculating elapsed time."); + TORCH_CHECK( + query() && other.query(), + "Both events must be completed before calculating elapsed time."); + TORCH_CHECK( + enable_timing_ && other.enable_timing_, + "Both events must be created with argument 'enable_timing=True'."); + +#if SYCL_COMPILER_VERSION < 20250000 + TORCH_CHECK_NOT_IMPLEMENTED( + false, + "elapsed_time of XPUEvent requires PyTorch to be built with SYCL compiler version 2025.0.0 or newer."); +#endif + + using namespace sycl::info::event_profiling; + // Block until both of the recorded events are completed. + uint64_t end_time_ns = other.event().get_profiling_info(); + uint64_t start_time_ns = event().get_profiling_info(); + // Return the eplased time in milliseconds. + return 1e-6 * + (static_cast(end_time_ns) - static_cast(start_time_ns)); + } + + void synchronize() const { + if (isCreated()) { + const c10::impl::PyInterpreter* interp = c10::impl::GPUTrace::get_trace(); + if (C10_UNLIKELY(interp)) { + (*interp)->trace_gpu_event_synchronization( + at::kXPU, reinterpret_cast(event_.get())); + } + event().wait_and_throw(); + } + } + + private: + void assignEvent(sycl::queue& queue) { +#if SYCL_COMPILER_VERSION >= 20250000 + if (enable_timing_) { + event_ = std::make_unique( + sycl::ext::oneapi::experimental::submit_profiling_tag(queue)); + } else { + event_ = std::make_unique(queue.ext_oneapi_submit_barrier()); + } +#else + event_ = std::make_unique(queue.ext_oneapi_submit_barrier()); +#endif + } + + void reassignEvent(sycl::queue& queue) { + event_.reset(); + assignEvent(queue); + } + + bool enable_timing_ = false; + DeviceIndex device_index_ = -1; + // Only need to track the last event, as events in an in-order queue are + // executed sequentially. + std::unique_ptr event_; +}; + +} // namespace at::xpu diff --git a/phivenv/Lib/site-packages/torch/include/ATen/xpu/XPUGeneratorImpl.h b/phivenv/Lib/site-packages/torch/include/ATen/xpu/XPUGeneratorImpl.h new file mode 100644 index 0000000000000000000000000000000000000000..7af22d180f04c78ecdf310bdac98b7d2af2fd5d5 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/xpu/XPUGeneratorImpl.h @@ -0,0 +1,39 @@ +#pragma once + +#include + +namespace at { + +struct TORCH_XPU_API XPUGeneratorImpl : public GeneratorImpl { + // Constructors + XPUGeneratorImpl(DeviceIndex device_index = -1); + ~XPUGeneratorImpl() override = default; + + // XPUGeneratorImpl methods + std::shared_ptr clone() const; + void set_current_seed(uint64_t seed) override; + void set_offset(uint64_t offset) override; + uint64_t get_offset() const override; + uint64_t current_seed() const override; + uint64_t seed() override; + void set_state(const c10::TensorImpl& new_state) override; + c10::intrusive_ptr get_state() const override; + void set_philox_offset_per_thread(uint64_t offset); + uint64_t philox_offset_per_thread() const; + std::pair philox_engine_inputs(uint64_t increment); + static c10::DeviceType device_type(); + + private: + XPUGeneratorImpl* clone_impl() const override; + uint64_t seed_ = default_rng_seed_val; + uint64_t philox_offset_per_thread_ = 0; +}; + +namespace xpu::detail { + +TORCH_XPU_API const Generator& getDefaultXPUGenerator(DeviceIndex device = -1); + +TORCH_XPU_API Generator createXPUGenerator(DeviceIndex device = -1); + +} // namespace xpu::detail +} // namespace at diff --git a/phivenv/Lib/site-packages/torch/include/ATen/xpu/detail/XPUHooks.h b/phivenv/Lib/site-packages/torch/include/ATen/xpu/detail/XPUHooks.h new file mode 100644 index 0000000000000000000000000000000000000000..223e6dc8c9daa4650a83ffc56f871c40b74fcbe5 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/ATen/xpu/detail/XPUHooks.h @@ -0,0 +1,33 @@ +#pragma once + +#include + +namespace at::xpu::detail { + +// The real implementation of XPUHooksInterface +struct XPUHooks : public at::XPUHooksInterface { + XPUHooks(at::XPUHooksArgs) {} + void init() const override; + bool hasXPU() const override; + std::string showConfig() const override; + int32_t getGlobalIdxFromDevice(const at::Device& device) const override; + const Generator& getDefaultGenerator( + DeviceIndex device_index = -1) const override; + Generator getNewGenerator(DeviceIndex device_index = -1) const override; + Device getDeviceFromPtr(void* data) const override; + c10::DeviceIndex getNumGPUs() const override; + DeviceIndex current_device() const override; + void deviceSynchronize(DeviceIndex device_index) const override; + Allocator* getPinnedMemoryAllocator() const override; + + bool isBuilt() const override { + return true; + } + bool isAvailable() const override; + bool isPinnedPtr(const void* data) const override; + bool hasPrimaryContext(DeviceIndex device_index) const override; + DeviceIndex deviceCount() const override; + DeviceIndex getCurrentDevice() const override; +}; + +} // namespace at::xpu::detail diff --git a/phivenv/Lib/site-packages/torch/include/c10/core/Allocator.h b/phivenv/Lib/site-packages/torch/include/c10/core/Allocator.h new file mode 100644 index 0000000000000000000000000000000000000000..ab39e872c89de0c72c83607c7a777ff2a4ea489e --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/core/Allocator.h @@ -0,0 +1,438 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace c10 { + +// A DataPtr is a unique pointer (with an attached deleter and some +// context for the deleter) to some memory, which also records what +// device is for its data. +// +// nullptr DataPtrs can still have a nontrivial device; this allows +// us to treat zero-size allocations uniformly with non-zero allocations. +// +class C10_API DataPtr { + private: + c10::detail::UniqueVoidPtr ptr_; + Device device_; + + public: + // Choice of CPU here is arbitrary; if there's an "undefined" device + // we could use that too + DataPtr() : device_(DeviceType::CPU) {} + DataPtr(void* data, Device device) : ptr_(data), device_(device) {} + DataPtr(void* data, void* ctx, DeleterFnPtr ctx_deleter, Device device) + : ptr_(data, ctx, ctx_deleter), device_(device) {} + void* operator->() const { + return ptr_.get(); + } + C10_ALWAYS_INLINE bool /* success */ unsafe_reset_data_and_ctx( + void* new_data_and_ctx) { + return ptr_.unsafe_reset_data_and_ctx(new_data_and_ctx); + } + void clear() { + ptr_.clear(); + } + void* get() const { + return ptr_.get(); + } + void* mutable_get() { + return ptr_.get(); + } + void* get_context() const { + return ptr_.get_context(); + } + void* release_context() { + return ptr_.release_context(); + } + std::unique_ptr&& move_context() { + return ptr_.move_context(); + } + operator bool() const { + return static_cast(ptr_); + } + template + T* cast_context(DeleterFnPtr expected_deleter) const { + return ptr_.cast_context(expected_deleter); + } + DeleterFnPtr get_deleter() const { + return ptr_.get_deleter(); + } + /** + * Compare the deleter in a DataPtr to expected_deleter. + * If it matches, replace the deleter with new_deleter + * and return true; otherwise, does nothing and returns + * false. + * + * In general, it is not safe to unconditionally set the + * deleter on a DataPtr, because you don't know what + * the deleter is, and thus will have a hard time properly + * disposing of the deleter without storing the original + * deleter (this is difficult to do, because DeleterFnPtr + * is not a closure, and because the context on DataPtr is + * only a single word, you generally don't have enough + * space to store both the original deleter and its context). + * However, in some cases, you know /exactly/ what the deleter + * is, and you have a new deleter that manually wraps + * the old one. In this case, you can safely swap the deleter + * after asserting that the deleters line up. + * + * What are the requirements on new_deleter? It must still + * properly dispose of the void* pointer passed in as its argument, + * where void* is whatever the context of the original deleter + * is. So in general, you expect the new deleter to look something + * like this: + * + * [](void* ptr) { + * some_new_stuff(ptr); + * get_orig_allocator()->raw_deleter(ptr); + * } + * + * Note that it won't work to close over the original + * allocator; you don't have enough space to do that! Also, + * it's unsafe to assume that the passed in pointer in + * question is the memory pointer in question; it might not + * be; be sure to read the source code of the Allocator + * in question to confirm this. + */ + [[nodiscard]] bool compare_exchange_deleter( + DeleterFnPtr expected_deleter, + DeleterFnPtr new_deleter) { + return ptr_.compare_exchange_deleter(expected_deleter, new_deleter); + } + Device device() const { + return device_; + } + // Unsafely mutates the device on a DataPtr. Under normal use, + // you should never actually need to call this function. + // We need this for the implementation of the hack detailed + // in Note [Masquerading as CUDA] + void unsafe_set_device(Device device) { + device_ = device; + } +}; + +// NB: Device is NOT tested for here; a CUDA nullptr is as much a nullptr as a +// CPU nullptr + +inline bool operator==(const DataPtr& dp, std::nullptr_t) noexcept { + return !dp; +} +inline bool operator==(std::nullptr_t, const DataPtr& dp) noexcept { + return !dp; +} +inline bool operator!=(const DataPtr& dp, std::nullptr_t) noexcept { + return dp; +} +inline bool operator!=(std::nullptr_t, const DataPtr& dp) noexcept { + return dp; +} + +// Note [raw_allocate/raw_deallocate and Thrust] +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +// Thrust's support for custom allocators requires us to write something +// like this: +// +// class ThrustAllocator { +// char* allocate(size_t); +// void deallocate(char*, size_t); +// }; +// +// This is not good for our unique_ptr based allocator interface, as +// there is no way to get to the context when we free. +// +// However, in some cases the context is exactly the same as +// the data pointer. In this case, we can support the "raw" +// allocate and deallocate interface. This is what +// raw_deleter signifies. By default, it returns a nullptr, which means that +// the raw interface is not implemented. Be sure to implement it whenever +// possible, or the raw interface will incorrectly reported as unsupported, +// when it is actually possible. + +// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) +struct C10_API Allocator { + virtual ~Allocator() = default; + + virtual DataPtr allocate(size_t n) = 0; + + // Clones an allocation that came from this allocator. + // + // To perform the copy, this function calls `copy_data`, which + // must be implemented by derived classes. + // + // Note that this explicitly ignores any context that may have been + // attached to the input data. + // + // Requires: input data was allocated by the same allocator. + DataPtr clone(const void* data, std::size_t n); + + // Checks if DataPtr has a simple context, not wrapped with any out of the + // ordinary contexts. + virtual bool is_simple_data_ptr(const DataPtr& data_ptr) const; + + // If this returns a non nullptr, it means that allocate() + // is guaranteed to return a unique_ptr with this deleter attached; + // it means the rawAllocate and rawDeallocate APIs are safe to use. + // This function MUST always return the same BoundDeleter. + virtual DeleterFnPtr raw_deleter() const { + return nullptr; + } + void* raw_allocate(size_t n) { + auto dptr = allocate(n); + AT_ASSERT(dptr.get() == dptr.get_context()); + return dptr.release_context(); + } + void raw_deallocate(void* ptr) { + auto d = raw_deleter(); + AT_ASSERT(d); + d(ptr); + } + + // Copies data from one allocation to another. + // Pure virtual, so derived classes must define behavior. + // Derived class implementation can simply call `default_copy_data` + // to use `std::memcpy`. + // + // Requires: src and dest were allocated by this allocator + // Requires: src and dest both have length >= count + virtual void copy_data(void* dest, const void* src, std::size_t count) + const = 0; + + protected: + // Uses `std::memcpy` to copy data. + // Child classes can use this as `copy_data` when an alternative copy + // API is not needed. + void default_copy_data(void* dest, const void* src, std::size_t count) const; +}; + +// This context is used to generate DataPtr which have arbitrary +// std::function deleters associated with them. In some user facing +// functions, we give a (user-friendly) interface for constructing +// tensors from external data which take an arbitrary std::function +// deleter. Grep for InefficientStdFunctionContext to find these +// occurrences. +// +// This context is inefficient because we have to do a dynamic +// allocation InefficientStdFunctionContext, on top of the dynamic +// allocation which is implied by std::function itself. +struct C10_API InefficientStdFunctionContext { + void* ptr_{nullptr}; + std::function deleter_; + InefficientStdFunctionContext(void* ptr, std::function deleter) + : ptr_(ptr), deleter_(std::move(deleter)) {} + InefficientStdFunctionContext(const InefficientStdFunctionContext&) = delete; + InefficientStdFunctionContext(InefficientStdFunctionContext&& rhs) noexcept + : ptr_(std::exchange(rhs.ptr_, nullptr)), + deleter_(std::move(rhs.deleter_)) {} + InefficientStdFunctionContext& operator=( + const InefficientStdFunctionContext&) = delete; + // NOLINTNEXTLINE(*-noexcept-move-*) + InefficientStdFunctionContext& operator=( + InefficientStdFunctionContext&& rhs) { + this->~InefficientStdFunctionContext(); + ptr_ = std::exchange(rhs.ptr_, nullptr); + deleter_ = std::move(rhs.deleter_); + return *this; + } + ~InefficientStdFunctionContext() { + if (deleter_) { + deleter_(ptr_); + } + } + static DataPtr makeDataPtr( + void* ptr, + std::function deleter, + Device device); +}; + +/** Set the allocator for DeviceType `t`. The passed in allocator pointer is + * expected to have static lifetime; this function does NOT take ownership + * of the raw pointer. (The reason for this is to prevent existing pointers + * to an allocator of a particular device from being invalidated when + * SetAllocator is called.) + * + * Also note that this is not thread-safe, and we assume this function will + * only be called during initialization. + * + * The 'priority' flag is introduced when we want to overwrite the default + * allocator, since the allocators are set statically. The default priority + * is 0, which means the lowest. Only higher or equal priority can overwrite + * existing ones. + */ +C10_API void SetAllocator(DeviceType t, Allocator* alloc, uint8_t priority = 0); +C10_API Allocator* GetAllocator(const DeviceType& t); + +template +struct AllocatorRegisterer { + explicit AllocatorRegisterer(Allocator* alloc) { + SetAllocator(t, alloc); + } +}; + +#define REGISTER_ALLOCATOR(t, f) \ + namespace { \ + static c10::AllocatorRegisterer g_allocator_d(f); \ + } + +// An interface for reporting thread local memory usage +// per device +struct C10_API MemoryReportingInfoBase : public c10::DebugInfoBase { + /** + * alloc_size corresponds to the size of the ptr. + * + * total_allocated corresponds to total allocated memory. + * + * total_reserved corresponds to total size of memory pool, both used and + * unused, if applicable. + */ + virtual void reportMemoryUsage( + void* ptr, + int64_t alloc_size, + size_t total_allocated, + size_t total_reserved, + Device device) = 0; + + virtual void reportOutOfMemory( + int64_t alloc_size, + size_t total_allocated, + size_t total_reserved, + Device device); + + virtual bool memoryProfilingEnabled() const = 0; +}; + +C10_API bool memoryProfilingEnabled(); +C10_API void reportMemoryUsageToProfiler( + void* ptr, + int64_t alloc_size, + size_t total_allocated, + size_t total_reserved, + Device device); + +C10_API void reportOutOfMemoryToProfiler( + int64_t alloc_size, + size_t total_allocated, + size_t total_reserved, + Device device); + +// used to hold traceback information in allocators +// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) +struct GatheredContext { + virtual ~GatheredContext() = default; +}; + +namespace CachingAllocator { +struct Stat { + void increase(size_t amount) { + current += static_cast(amount); + peak = std::max(current, peak); + allocated += static_cast(amount); + } + + void decrease(size_t amount) { + current -= static_cast(amount); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY( + current >= 0, + "Negative tracked stat in device allocator (likely logic error)."); + freed += static_cast(amount); + } + + void reset_accumulated() { + allocated = 0; + freed = 0; + } + + void reset_peak() { + peak = current; + } + + int64_t current = 0; + int64_t peak = 0; + int64_t allocated = 0; + int64_t freed = 0; +}; + +enum struct StatType : uint64_t { + AGGREGATE = 0, + SMALL_POOL = 1, + LARGE_POOL = 2, + NUM_TYPES = 3 // remember to update this whenever a new stat type is added +}; + +using StatArray = std::array(StatType::NUM_TYPES)>; +using StatTypes = std::array(StatType::NUM_TYPES)>; + +template +void for_each_selected_stat_type(const StatTypes& stat_types, Func f) { + for (const auto stat_type : c10::irange(stat_types.size())) { + if (stat_types[stat_type]) { + f(stat_type); + } + } +} + +// Structure for keeping timing information +struct DurationStat { + void increase(int64_t amount) { + total += amount; + count += 1; + max = std::max(amount, max); + if (min == 0) { + min = amount; + } else { + min = std::min(amount, min); + } + } + + void reset_accumulated() { + total = 0; + count = 0; + } + + void reset_peak() { + min = 0; + max = 0; + } + + int64_t total = 0; + int64_t max = 0; + int64_t min = 0; + int64_t count = 0; +}; + +// Size pretty-printer +inline std::string format_size(uint64_t size) { + std::ostringstream os; + os.precision(2); + os << std::fixed; + if (size <= 1024) { + os << size << " bytes"; + } else if (size <= 1048576) { + os << (static_cast(size) / 1024.0); + os << " KiB"; + } else if (size <= 1073741824ULL) { + os << static_cast(size) / 1048576.0; + os << " MiB"; + } else { + os << static_cast(size) / 1073741824.0; + os << " GiB"; + } + return os.str(); +} + +} // namespace CachingAllocator +} // namespace c10 diff --git a/phivenv/Lib/site-packages/torch/include/c10/core/AutogradState.h b/phivenv/Lib/site-packages/torch/include/c10/core/AutogradState.h new file mode 100644 index 0000000000000000000000000000000000000000..37dcca24aee70589084dde18afa9ffd7207d2d3c --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/core/AutogradState.h @@ -0,0 +1,73 @@ +#pragma once + +#include + +namespace c10 { + +// Structure used to pack all the thread local boolean +// flags used by autograd +struct C10_API AutogradState { + static AutogradState& get_tls_state(); + static void set_tls_state(AutogradState state); + + AutogradState( + bool grad_mode, + bool inference_mode, + bool fw_grad_mode, + bool multithreading_enabled) + : grad_mode_(grad_mode), + inference_mode_(inference_mode), + fw_grad_mode_(fw_grad_mode), + multithreading_enabled_(multithreading_enabled), + view_replay_enabled_(false) {} + + void set_grad_mode(bool enabled) { + grad_mode_ = enabled; + } + + void set_fw_grad_mode(bool enabled) { + fw_grad_mode_ = enabled; + } + + void set_inference_mode(bool enabled) { + inference_mode_ = enabled; + } + + void set_multithreading_enabled(bool multithreading_enabled) { + multithreading_enabled_ = multithreading_enabled; + } + + void set_view_replay_enabled(bool view_replay_enabled) { + view_replay_enabled_ = view_replay_enabled; + } + + bool get_grad_mode() const { + return grad_mode_; + } + + bool get_fw_grad_mode() const { + return fw_grad_mode_; + } + + bool get_inference_mode() const { + return inference_mode_; + } + + bool get_multithreading_enabled() const { + return multithreading_enabled_; + } + + bool get_view_replay_enabled() const { + return view_replay_enabled_; + } + + private: + bool grad_mode_ : 1; + bool inference_mode_ : 1; + bool fw_grad_mode_ : 1; + bool multithreading_enabled_ : 1; + // NOLINTNEXTLINE(cppcoreguidelines-use-default-member-init) + bool view_replay_enabled_ : 1; +}; + +} // namespace c10 diff --git a/phivenv/Lib/site-packages/torch/include/c10/core/Backend.h b/phivenv/Lib/site-packages/torch/include/c10/core/Backend.h new file mode 100644 index 0000000000000000000000000000000000000000..9e0fc97a67c93de9a03cfe630b0d8603afd27cf5 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/core/Backend.h @@ -0,0 +1,387 @@ +#pragma once + +#include +#include +#include +#include + +#include + +namespace c10 { + +/** + * This legacy enum class defines the set of backends supported by old school, + * code generated Type-based ATen. A "backend" in this sense roughly + * corresponds to the cartesian product of (device type, layout), but restricted + * only to combinations which we actually have kernels for. Backend does NOT + * include dtype. + * + * The reason we are sunsetting this enum class is because it doesn't allow for + * open registration; e.g., if you want to add SparseXLA, you'd have to + * edit this enum; you wouldn't be able to do it out of tree. DispatchKey is + * the replacement for Backend which supports open registration. + * + * NB: The concept of 'Backend' here disagrees with the notion of backend + * exposed to users in torch.backends. Backend here is something like "CPU" + * or "SparseCUDA"; backend in torch.backends is something like "MKL" or + * "CUDNN". + */ +enum class Backend { + CPU, + CUDA, + HIP, + VE, + FPGA, + IPU, + XPU, + SparseCPU, + SparseCUDA, + SparseCsrCPU, + SparseCsrCUDA, + SparseHIP, + SparseVE, + SparseXPU, + SparsePrivateUse1, + SparseCsrHIP, + SparseCsrVE, + SparseCsrXPU, + SparseCsrPrivateUse1, + MAIA, + XLA, + Vulkan, + Metal, + Meta, + QuantizedCPU, + QuantizedCUDA, + QuantizedXPU, + QuantizedPrivateUse1, + Undefined, + MkldnnCPU, + MPS, + HPU, + Lazy, + MTIA, + PrivateUse1, + NumOptions +}; + +inline Backend dispatchKeyToBackend(DispatchKey t) { + if (t == DispatchKey::CPU || t == DispatchKey::AutogradCPU) { + return Backend::CPU; + } else if (t == DispatchKey::CUDA || t == DispatchKey::AutogradCUDA) { + return Backend::CUDA; + } else if (t == DispatchKey::HIP) { + return Backend::HIP; + } else if (t == DispatchKey::VE) { + return Backend::VE; + } else if (t == DispatchKey::FPGA) { + return Backend::FPGA; + } else if (t == DispatchKey::MAIA || t == DispatchKey::AutogradMAIA) { + return Backend::MAIA; + } else if (t == DispatchKey::XLA || t == DispatchKey::AutogradXLA) { + return Backend::XLA; + } else if (t == DispatchKey::Lazy || t == DispatchKey::AutogradLazy) { + return Backend::Lazy; + } else if (t == DispatchKey::MPS || t == DispatchKey::AutogradMPS) { + return Backend::MPS; + } else if (t == DispatchKey::Vulkan) { + return Backend::Vulkan; + } else if (t == DispatchKey::Metal) { + return Backend::Metal; + } else if (t == DispatchKey::Meta) { + return Backend::Meta; + } else if (t == DispatchKey::SparseCPU) { + return Backend::SparseCPU; + } else if (t == DispatchKey::SparseCUDA) { + return Backend::SparseCUDA; + } else if (t == DispatchKey::SparseHIP) { + return Backend::SparseHIP; + } else if (t == DispatchKey::SparseVE) { + return Backend::SparseVE; + } else if (t == DispatchKey::SparsePrivateUse1) { + return Backend::SparsePrivateUse1; + } else if (t == DispatchKey::SparseCsrCPU) { + return Backend::SparseCsrCPU; + } else if (t == DispatchKey::SparseCsrCUDA) { + return Backend::SparseCsrCUDA; + } else if (t == DispatchKey::SparseCsrHIP) { + return Backend::SparseCsrHIP; + } else if (t == DispatchKey::SparseCsrVE) { + return Backend::SparseCsrVE; + } else if (t == DispatchKey::SparseCsrPrivateUse1) { + return Backend::SparseCsrPrivateUse1; + } else if (t == DispatchKey::MkldnnCPU) { + return Backend::MkldnnCPU; + } else if (t == DispatchKey::QuantizedCPU) { + return Backend::QuantizedCPU; + } else if (t == DispatchKey::QuantizedCUDA) { + return Backend::QuantizedCUDA; + } else if (t == DispatchKey::IPU || t == DispatchKey::AutogradIPU) { + return Backend::IPU; + } else if (t == DispatchKey::XPU || t == DispatchKey::AutogradXPU) { + return Backend::XPU; + } else if (t == DispatchKey::SparseXPU) { + return Backend::SparseXPU; + } else if (t == DispatchKey::SparseCsrXPU) { + return Backend::SparseCsrXPU; + } else if (t == DispatchKey::QuantizedXPU) { + return Backend::QuantizedXPU; + } else if (t == DispatchKey::QuantizedPrivateUse1) { + return Backend::QuantizedPrivateUse1; + } else if (t == DispatchKey::HPU || t == DispatchKey::AutogradHPU) { + return Backend::HPU; + } else if (t == DispatchKey::MTIA || t == DispatchKey::AutogradMTIA) { + return Backend::MTIA; + } else if ( + t == DispatchKey::PrivateUse1 || t == DispatchKey::AutogradPrivateUse1) { + return Backend::PrivateUse1; + } else if (t == DispatchKey::Undefined) { + return Backend::Undefined; + } else { + TORCH_CHECK(false, "Unrecognized tensor type ID: ", t); + } +} + +inline DispatchKey backendToDispatchKey(Backend b) { + switch (b) { + case Backend::CPU: + return DispatchKey::CPU; + case Backend::CUDA: + return DispatchKey::CUDA; + case Backend::HIP: + return DispatchKey::HIP; + case Backend::VE: + return DispatchKey::VE; + case Backend::FPGA: + return DispatchKey::FPGA; + case Backend::MAIA: + return DispatchKey::MAIA; + case Backend::XLA: + return DispatchKey::XLA; + case Backend::Lazy: + return DispatchKey::Lazy; + case Backend::IPU: + return DispatchKey::IPU; + case Backend::XPU: + return DispatchKey::XPU; + case Backend::SparseXPU: + return DispatchKey::SparseXPU; + case Backend::SparseCsrXPU: + return DispatchKey::SparseCsrXPU; + case Backend::SparseCPU: + return DispatchKey::SparseCPU; + case Backend::SparseCUDA: + return DispatchKey::SparseCUDA; + case Backend::SparseHIP: + return DispatchKey::SparseHIP; + case Backend::SparseVE: + return DispatchKey::SparseVE; + case Backend::SparsePrivateUse1: + return DispatchKey::SparsePrivateUse1; + case Backend::SparseCsrCPU: + return DispatchKey::SparseCsrCPU; + case Backend::SparseCsrCUDA: + return DispatchKey::SparseCsrCUDA; + case Backend::SparseCsrHIP: + return DispatchKey::SparseCsrHIP; + case Backend::SparseCsrVE: + return DispatchKey::SparseCsrVE; + case Backend::SparseCsrPrivateUse1: + return DispatchKey::SparseCsrPrivateUse1; + case Backend::MkldnnCPU: + return DispatchKey::MkldnnCPU; + case Backend::Vulkan: + return DispatchKey::Vulkan; + case Backend::Metal: + return DispatchKey::Metal; + case Backend::Meta: + return DispatchKey::Meta; + case Backend::QuantizedCPU: + return DispatchKey::QuantizedCPU; + case Backend::QuantizedCUDA: + return DispatchKey::QuantizedCUDA; + case Backend::QuantizedPrivateUse1: + return DispatchKey::QuantizedPrivateUse1; + case Backend::Undefined: + return DispatchKey::Undefined; + case Backend::MPS: + return DispatchKey::MPS; + case Backend::HPU: + return DispatchKey::HPU; + case Backend::MTIA: + return DispatchKey::MTIA; + case Backend::PrivateUse1: + return DispatchKey::PrivateUse1; + default: + throw std::runtime_error("Unknown backend"); + } +} + +inline DeviceType backendToDeviceType(Backend b) { + switch (b) { + case Backend::CPU: + case Backend::MkldnnCPU: + case Backend::SparseCPU: + case Backend::SparseCsrCPU: + case Backend::QuantizedCPU: + return DeviceType::CPU; + case Backend::CUDA: + case Backend::SparseCUDA: + case Backend::QuantizedCUDA: + case Backend::SparseCsrCUDA: + return DeviceType::CUDA; + case Backend::HIP: + return DeviceType::HIP; + case Backend::VE: + return DeviceType::VE; + case Backend::FPGA: + return DeviceType::FPGA; + case Backend::MAIA: + return DeviceType::MAIA; + case Backend::XLA: + return DeviceType::XLA; + case Backend::Lazy: + return DeviceType::Lazy; + case Backend::SparseHIP: + return DeviceType::HIP; + case Backend::SparseVE: + return DeviceType::VE; + case Backend::SparseCsrHIP: + return DeviceType::HIP; + case Backend::SparseCsrVE: + return DeviceType::VE; + case Backend::IPU: + return DeviceType::IPU; + case Backend::XPU: + case Backend::SparseXPU: + case Backend::SparseCsrXPU: + case Backend::QuantizedXPU: + return DeviceType::XPU; + case Backend::Vulkan: + return DeviceType::Vulkan; + case Backend::Metal: + return DeviceType::Metal; + case Backend::Meta: + return DeviceType::Meta; + case Backend::MPS: + return DeviceType::MPS; + case Backend::HPU: + return DeviceType::HPU; + case Backend::MTIA: + return DeviceType::MTIA; + case Backend::PrivateUse1: + case Backend::SparsePrivateUse1: + case Backend::SparseCsrPrivateUse1: + case Backend::QuantizedPrivateUse1: + return DeviceType::PrivateUse1; + case Backend::Undefined: + TORCH_CHECK(false, "Undefined backend is not a valid device type"); + default: + TORCH_CHECK(false, "Unknown backend"); + } +} + +inline const char* toString(Backend b) { + switch (b) { + case Backend::CPU: + return "CPU"; + case Backend::CUDA: + return "CUDA"; + case Backend::HIP: + return "HIP"; + case Backend::VE: + return "VE"; + case Backend::FPGA: + return "FPGA"; + case Backend::XPU: + return "XPU"; + case Backend::IPU: + return "IPU"; + case Backend::MAIA: + return "MAIA"; + case Backend::XLA: + return "XLA"; + case Backend::Lazy: + return "Lazy"; + case Backend::MPS: + return "MPS"; + case Backend::SparseCPU: + return "SparseCPU"; + case Backend::SparseCUDA: + return "SparseCUDA"; + case Backend::SparseHIP: + return "SparseHIP"; + case Backend::SparseVE: + return "SparseVE"; + case Backend::SparseXPU: + return "SparseXPU"; + case Backend::SparsePrivateUse1: + return "SparsePrivateUse1"; + case Backend::SparseCsrCPU: + return "SparseCsrCPU"; + case Backend::SparseCsrCUDA: + return "SparseCsrCUDA"; + case Backend::SparseCsrHIP: + return "SparseCsrHIP"; + case Backend::SparseCsrVE: + return "SparseCsrVE"; + case Backend::SparseCsrXPU: + return "SparseCsrXPU"; + case Backend::SparseCsrPrivateUse1: + return "SparseCsrPrivateUse1"; + case Backend::MkldnnCPU: + return "MkldnnCPU"; + case Backend::Vulkan: + return "Vulkan"; + case Backend::Metal: + return "Metal"; + case Backend::Meta: + return "Meta"; + case Backend::QuantizedCPU: + return "QuantizedCPU"; + case Backend::QuantizedCUDA: + return "QuantizedCUDA"; + case Backend::QuantizedXPU: + return "QuantizedXPU"; + case Backend::QuantizedPrivateUse1: + return "QuantizedPrivateUse1"; + case Backend::HPU: + return "HPU"; + case Backend::MTIA: + return "MTIA"; + case Backend::PrivateUse1: + return "PrivateUseOne"; + default: + return "UNKNOWN_BACKEND"; + } +} + +inline bool isSparse(Backend b) { + switch (b) { + case Backend::SparseXPU: + case Backend::SparseCPU: + case Backend::SparseCUDA: + case Backend::SparseHIP: + case Backend::SparseVE: + case Backend::SparsePrivateUse1: + return true; + default: + return false; + } +} + +inline bool isSparseCsr(Backend b) { + switch (b) { + case Backend::SparseCsrXPU: + case Backend::SparseCsrCPU: + case Backend::SparseCsrCUDA: + case Backend::SparseCsrHIP: + case Backend::SparseCsrVE: + case Backend::SparseCsrPrivateUse1: + return true; + default: + return false; + } +} + +} // namespace c10 diff --git a/phivenv/Lib/site-packages/torch/include/c10/core/CPUAllocator.h b/phivenv/Lib/site-packages/torch/include/c10/core/CPUAllocator.h new file mode 100644 index 0000000000000000000000000000000000000000..d84ac28ec9e99d989692e37a1c465689dc7edf9a --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/core/CPUAllocator.h @@ -0,0 +1,59 @@ +#pragma once + +#include +#include +#include +#include + +#include +#include +#include + +// TODO: rename to c10 +C10_DECLARE_bool(caffe2_report_cpu_memory_usage); + +namespace c10 { + +using MemoryDeleter = void (*)(void*); + +// A helper function that is basically doing nothing. +C10_API void NoDelete(void*); + +// A simple struct that is used to report C10's memory allocation, +// deallocation status and out-of-memory events to the profiler +class C10_API ProfiledCPUMemoryReporter { + public: + ProfiledCPUMemoryReporter() = default; + void New(void* ptr, size_t nbytes); + void OutOfMemory(size_t nbytes); + void Delete(void* ptr); + + private: + std::mutex mutex_; + std::unordered_map size_table_; + size_t allocated_ = 0; + size_t log_cnt_ = 0; +}; + +C10_API ProfiledCPUMemoryReporter& profiledCPUMemoryReporter(); + +// Get the CPU Allocator. +C10_API at::Allocator* GetCPUAllocator(); +// Sets the CPU allocator to the given allocator: the caller gives away the +// ownership of the pointer. +C10_API void SetCPUAllocator(at::Allocator* alloc, uint8_t priority = 0); + +// Get the Default CPU Allocator +C10_API at::Allocator* GetDefaultCPUAllocator(); + +// Get the Default Mobile CPU Allocator +C10_API at::Allocator* GetDefaultMobileCPUAllocator(); + +// The CPUCachingAllocator is experimental and might disappear in the future. +// The only place that uses it is in StaticRuntime. +// Set the CPU Caching Allocator +C10_API void SetCPUCachingAllocator(Allocator* alloc, uint8_t priority = 0); +// Get the CPU Caching Allocator +C10_API Allocator* GetCPUCachingAllocator(); + +} // namespace c10 diff --git a/phivenv/Lib/site-packages/torch/include/c10/core/CachingDeviceAllocator.h b/phivenv/Lib/site-packages/torch/include/c10/core/CachingDeviceAllocator.h new file mode 100644 index 0000000000000000000000000000000000000000..82e3bf1a40af2ffdec2f4b424ab741f277715363 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/core/CachingDeviceAllocator.h @@ -0,0 +1,61 @@ +#pragma once + +#include + +namespace c10::CachingDeviceAllocator { + +using namespace c10::CachingAllocator; + +// Struct containing memory allocator summary statistics for a device. +struct DeviceStats { + // COUNT: allocations requested by client code + StatArray allocation; + // COUNT: number of allocated segments from device memory allocation. + StatArray segment; + // COUNT: number of active memory blocks (allocated or used by stream) + StatArray active; + // COUNT: number of inactive, split memory blocks (unallocated but can't be + // released via device memory deallocation) + StatArray inactive_split; + + // SUM: bytes allocated by this memory allocator + StatArray allocated_bytes; + // SUM: bytes reserved by this memory allocator (both free and used) + StatArray reserved_bytes; + // SUM: bytes within active memory blocks + StatArray active_bytes; + // SUM: bytes within inactive, split memory blocks + StatArray inactive_split_bytes; + // SUM: bytes requested by client code + StatArray requested_bytes; + + // COUNT: total number of failed calls to device malloc necessitating cache + // flushes. + int64_t num_alloc_retries = 0; + + // COUNT: total number of OOMs (i.e. failed calls to device memory allocation + // after cache flush) + int64_t num_ooms = 0; + + // COUNT: total number of oversize blocks allocated from pool + Stat oversize_allocations; + + // COUNT: total number of oversize blocks requiring malloc + Stat oversize_segments; + + // COUNT: total number of synchronize_and_free_events() calls + int64_t num_sync_all_streams = 0; + + // COUNT: total number of device memory allocation calls. This includes both + // mapped and malloced memory. + int64_t num_device_alloc = 0; + + // COUNT: total number of device memory deallocation calls. This includes both + // un-mapped and free memory. + int64_t num_device_free = 0; + + // SIZE: maximum block size that is allowed to be split. + int64_t max_split_size = 0; +}; + +} // namespace c10::CachingDeviceAllocator diff --git a/phivenv/Lib/site-packages/torch/include/c10/core/CompileTimeFunctionPointer.h b/phivenv/Lib/site-packages/torch/include/c10/core/CompileTimeFunctionPointer.h new file mode 100644 index 0000000000000000000000000000000000000000..698f191056693249c0f0a95f1e671f7f6cf67d12 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/core/CompileTimeFunctionPointer.h @@ -0,0 +1,57 @@ +#pragma once + +#include +#include + +namespace c10 { + +/** + * Represent a function pointer as a C++ type. + * This allows using the function pointer as a type + * in a template and calling it from inside the template + * allows the compiler to inline the call because it + * knows the function pointer at compile time. + * + * Example 1: + * int add(int a, int b) {return a + b;} + * using Add = TORCH_FN_TYPE(add); + * template struct Executor { + * int execute(int a, int b) { + * return Func::func_ptr()(a, b); + * } + * }; + * Executor executor; + * EXPECT_EQ(3, executor.execute(1, 2)); + * + * Example 2: + * int add(int a, int b) {return a + b;} + * template int execute(Func, int a, int b) { + * return Func::func_ptr()(a, b); + * } + * EXPECT_EQ(3, execute(TORCH_FN(add), 1, 2)); + */ +template +struct CompileTimeFunctionPointer final { + static_assert( + guts::is_function_type::value, + "TORCH_FN can only wrap function types."); + using FuncType = FuncType_; + + static constexpr FuncType* func_ptr() { + return func_ptr_; + } +}; + +template +struct is_compile_time_function_pointer : std::false_type {}; +template +struct is_compile_time_function_pointer< + CompileTimeFunctionPointer> : std::true_type {}; + +} // namespace c10 + +#define TORCH_FN_TYPE(func) \ + ::c10::CompileTimeFunctionPointer< \ + std::remove_pointer_t>, \ + func> +#define TORCH_FN(func) TORCH_FN_TYPE(func)() diff --git a/phivenv/Lib/site-packages/torch/include/c10/core/ConstantSymNodeImpl.h b/phivenv/Lib/site-packages/torch/include/c10/core/ConstantSymNodeImpl.h new file mode 100644 index 0000000000000000000000000000000000000000..f65c70da714df8cd54eec5d0b4c1e821d14f7bc7 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/core/ConstantSymNodeImpl.h @@ -0,0 +1,110 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace c10 { + +// Unlike other SymNodeImpl, this cannot be "dispatched" conventionally, +// as it typically needs to defer to another SymNodeImpl +// +// Can either represent a bool, int (don't support float yet) this is useful +// for representing otherwise unrepresentable large negative integer constant. +template +class C10_API ConstantSymNodeImpl : public SymNodeImpl { + static_assert( + ::std::is_same_v || ::std::is_same_v, + "ConstantSymNodeImpl can only accept int64_t or bool types"); + + public: + ConstantSymNodeImpl(T val) : value_(val) {} + + bool is_int() override { + return is_int_(); + } + bool is_bool() override { + return is_bool_(); + } + bool is_float() override { + return false; + } + int64_t guard_int( + const char* file [[maybe_unused]], + int64_t line [[maybe_unused]]) override { + TORCH_CHECK(is_int(), "not an int"); + return int_(); + } + bool guard_bool( + const char* file [[maybe_unused]], + int64_t line [[maybe_unused]]) override { + TORCH_CHECK(is_bool(), "not a bool"); + return bool_(); + } + double guard_float( + const char* file [[maybe_unused]], + int64_t line [[maybe_unused]]) override { + TORCH_CHECK(false, "not a float"); + } + int64_t int_() override { + TORCH_CHECK(is_int(), "not an int"); + return ::std::get(value_); + } + bool bool_() override { + TORCH_CHECK(is_bool(), "not a bool"); + return ::std::get(value_); + } + bool has_hint() override { + return true; + } + c10::SymNode eq(const c10::SymNode& other) override; + c10::SymNode ne(const c10::SymNode& other) override; + c10::SymNode ge(const c10::SymNode& other) override; + c10::SymNode le(const c10::SymNode& other) override; + c10::SymNode lt(const c10::SymNode& other) override; + c10::SymNode gt(const c10::SymNode& other) override; + c10::SymNode mul(const c10::SymNode& other) override; + ::std::string str() override { + if constexpr (is_int_()) { + return ::std::to_string(::std::get(value_)); + } else { + return ::std::get(value_) ? "true" : "false"; + } + } + std::optional constant_int() override { + if constexpr (is_int_()) { + return ::std::get(value_); + } else { + return std::nullopt; + } + } + std::optional constant_bool() override { + if constexpr (is_bool_()) { + return ::std::get(value_); + } else { + return std::nullopt; + } + } + bool is_constant() override { + return true; + } + bool is_symbolic() override { + return false; + } + + private: + ::std::variant value_; + + static constexpr bool is_int_() { + return ::std::is_same_v; + } + static constexpr bool is_bool_() { + return ::std::is_same_v; + } +}; + +} // namespace c10 diff --git a/phivenv/Lib/site-packages/torch/include/c10/core/Contiguity.h b/phivenv/Lib/site-packages/torch/include/c10/core/Contiguity.h new file mode 100644 index 0000000000000000000000000000000000000000..0e15a8dd788b6c793af697fdce3f905720afef65 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/core/Contiguity.h @@ -0,0 +1,154 @@ +#pragma once +#include +#include +#include +#include +#include + +#include +#include + +namespace c10 { + +template +bool _compute_contiguous(ArrayRef sizes, ArrayRef strides, T numel) { + if (TORCH_GUARD_SIZE_OBLIVIOUS(sym_eq(numel, 0))) { + return true; + } + + T expected_stride = 1; + // NB: make sure we do signed arithmetic + for (int64_t d = int64_t(sizes.size()) - 1; d >= 0; d--) { + const auto& size_d = sizes[d]; + if (TORCH_GUARD_SIZE_OBLIVIOUS(sym_eq(size_d, 1))) { + continue; + } + + if (TORCH_GUARD_SIZE_OBLIVIOUS(sym_ne(strides[d], expected_stride))) { + return false; + } + expected_stride *= size_d; + } + return true; +} + +// This function will return True if the tensor is contiguous, and False if the +// its not or if we can't determine if it is contiguous due to unbacked symbols +// (it could be either in that case based on the actual runtime data). +template +bool definitely_contiguous(ArrayRef sizes, ArrayRef strides, T numel) { + if (TORCH_GUARD_OR_FALSE(sym_eq(numel, 0))) { + return true; + } + + T expected_stride = 1; + // NB: make sure we do signed arithmetic + for (int64_t d = int64_t(sizes.size()) - 1; d >= 0; d--) { + const auto& size_d = sizes[d]; + if (TORCH_GUARD_OR_FALSE(sym_eq(size_d, 1))) { + continue; + } + + if (TORCH_GUARD_OR_TRUE(sym_ne(strides[d], expected_stride))) { + return false; + } + expected_stride *= size_d; + } + return true; +} + +template +bool _compute_channels_last_contiguous_2d( + ArrayRef sizes, + ArrayRef strides) { + // Please don't combine these code, constant array is used here to let + // compiler fully unroll the loop to get better performance + switch (sizes.size()) { + case 4: { + T expected = 1; + for (auto& d : {1, 3, 2, 0}) { + const auto& size_d = sizes[d]; + if (TORCH_GUARD_SIZE_OBLIVIOUS(sym_ne(size_d, 1))) { + if (TORCH_GUARD_SIZE_OBLIVIOUS(sym_ne(strides[d], expected))) { + return false; + } + expected *= size_d; + } + } + return true; + } + // NOLINTNEXTLINE(bugprone-branch-clone) + case 3: + // TODO dim == 3 case will be enabled once it is fully tested + return false; + default: + return false; + } +} + +template +bool _compute_channels_last_contiguous_3d( + ArrayRef sizes, + ArrayRef strides) { + // Please don't combine these code, constant array is used here to let + // compiler fully unroll the loop to get better performance + switch (sizes.size()) { + case 5: { + T expected = 1; + for (auto& d : {1, 4, 3, 2, 0}) { + const auto& size_d = sizes[d]; + if (TORCH_GUARD_SIZE_OBLIVIOUS(sym_ne(size_d, 1))) { + if (TORCH_GUARD_SIZE_OBLIVIOUS(sym_ne(strides[d], expected))) { + return false; + } + expected *= size_d; + } + } + return true; + } + // NOLINTNEXTLINE(bugprone-branch-clone) + case 4: + // TODO dim == 4 case will be enabled once it is fully tested + return false; + default: + return false; + } +} + +template +bool _compute_non_overlapping_and_dense( + ArrayRef sizes, + ArrayRef strides) { + auto dim = sizes.size(); + if (dim == 1) { + return sizes[0] < 2 || strides[0] == 1; + } + SmallVector perm; + perm.resize(dim); + for (const auto i : c10::irange(dim)) { + perm[i] = i; + } + // Sort by strides, leaving 0 and 1 sized dims at the end of the array + std::sort(perm.begin(), perm.end(), [&](int64_t a, int64_t b) { + if (sizes[a] < 2) { + return false; + } else if (sizes[b] < 2) { + return true; + } + return strides[a] < strides[b]; + }); + T require_stride = 1; + for (const auto i : c10::irange(dim)) { + const auto& size_perm_i = sizes[perm[i]]; + if (size_perm_i < 2) { + return true; + } + if (strides[perm[i]] != require_stride) { + return false; + } + require_stride *= size_perm_i; + } + return true; +} + +} // namespace c10 diff --git a/phivenv/Lib/site-packages/torch/include/c10/core/CopyBytes.h b/phivenv/Lib/site-packages/torch/include/c10/core/CopyBytes.h new file mode 100644 index 0000000000000000000000000000000000000000..f5b08d74aa6ba3525d2ab50f73dd217b893b4f9d --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/core/CopyBytes.h @@ -0,0 +1,48 @@ +#pragma once + +#include +#include +#include +#include +#include + +namespace c10 { + +using CopyBytesFunction = void (*)( + size_t nbytes, + const void* src, + Device src_device, + void* dst, + Device dst_device); + +struct C10_API _CopyBytesFunctionRegisterer { + _CopyBytesFunctionRegisterer( + DeviceType from, + DeviceType to, + CopyBytesFunction func_sync, + CopyBytesFunction func_async = nullptr); +}; + +#define REGISTER_COPY_BYTES_FUNCTION(from, to, ...) \ + namespace { \ + static _CopyBytesFunctionRegisterer C10_ANONYMOUS_VARIABLE( \ + g_copy_function)(from, to, __VA_ARGS__); \ + } + +/* + * WARNING: Implementations for this function are currently registered from + * ATen and caffe2, not yet from c10. Don't use this if not either ATen + * or caffe2 is present as well. + * We can't move them yet, because the CUDA implementations aren't unified yet + * between ATen and caffe2. + * We're planning to move the implementations into c10/backend/xxx + * to make c10 self contained again. + */ +C10_API void CopyBytes( + size_t nbytes, + const void* src, + Device src_device, + void* dst, + Device dst_device, + bool async); +} // namespace c10 diff --git a/phivenv/Lib/site-packages/torch/include/c10/core/DefaultDtype.h b/phivenv/Lib/site-packages/torch/include/c10/core/DefaultDtype.h new file mode 100644 index 0000000000000000000000000000000000000000..5c4cd53fa78dc59fc7da6b0fbe4fd2ad9f91cf77 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/core/DefaultDtype.h @@ -0,0 +1,15 @@ +#pragma once + +#include +#include + +namespace caffe2 { +class TypeMeta; +} // namespace caffe2 + +namespace c10 { +C10_API void set_default_dtype(caffe2::TypeMeta dtype); +C10_API const caffe2::TypeMeta get_default_dtype(); +C10_API ScalarType get_default_dtype_as_scalartype(); +C10_API const caffe2::TypeMeta get_default_complex_dtype(); +} // namespace c10 diff --git a/phivenv/Lib/site-packages/torch/include/c10/core/DefaultTensorOptions.h b/phivenv/Lib/site-packages/torch/include/c10/core/DefaultTensorOptions.h new file mode 100644 index 0000000000000000000000000000000000000000..c00197ead055805164a6f99ff8600b46784f3963 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/core/DefaultTensorOptions.h @@ -0,0 +1,45 @@ +#pragma once + +#include +#include +#include +#include +#include + +namespace c10 { + +struct TensorOptions; + +/// Like TensorOptions, but all fields are guaranteed to be filled. +struct DefaultTensorOptions { + DefaultTensorOptions() = default; + + caffe2::TypeMeta dtype() const noexcept { + return dtype_; + } + Device device() const noexcept { + return device_; + } + Layout layout() const noexcept { + return layout_; + } + bool requires_grad() const noexcept { + return requires_grad_; + } + + // Defined in TensorOptions.h + inline DefaultTensorOptions& merge(const TensorOptions& options); + + private: + caffe2::TypeMeta dtype_ = caffe2::TypeMeta::Make(); // 64-bit + Device device_ = at::kCPU; // 32-bit + Layout layout_ = at::kStrided; // 8-bit + bool requires_grad_ = false; // 8-bit +}; + +inline const DefaultTensorOptions& getDefaultTensorOptions() { + static const auto options = DefaultTensorOptions(); + return options; +} + +} // namespace c10 diff --git a/phivenv/Lib/site-packages/torch/include/c10/core/Device.h b/phivenv/Lib/site-packages/torch/include/c10/core/Device.h new file mode 100644 index 0000000000000000000000000000000000000000..98aee52700b6c63cfeb28be194e0e3524b7126d4 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/core/Device.h @@ -0,0 +1,216 @@ +#pragma once + +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace c10 { + +/// An index representing a specific device; e.g., the 1 in GPU 1. +/// A DeviceIndex is not independently meaningful without knowing +/// the DeviceType it is associated; try to use Device rather than +/// DeviceIndex directly. +using DeviceIndex = int8_t; + +/// Represents a compute device on which a tensor is located. A device is +/// uniquely identified by a type, which specifies the type of machine it is +/// (e.g. CPU or CUDA GPU), and a device index or ordinal, which identifies the +/// specific compute device when there is more than one of a certain type. The +/// device index is optional, and in its defaulted state represents (abstractly) +/// "the current device". Further, there are two constraints on the value of the +/// device index, if one is explicitly stored: +/// 1. A negative index represents the current device, a non-negative index +/// represents a specific, concrete device, +/// 2. When the device type is CPU, the device index must be zero. +struct C10_API Device final { + using Type = DeviceType; + + /// Constructs a new `Device` from a `DeviceType` and an optional device + /// index. + /* implicit */ Device(DeviceType type, DeviceIndex index = -1) + : type_(type), index_(index) { + validate(); + } + + /// Constructs a `Device` from a string description, for convenience. + /// The string supplied must follow the following schema: + /// `(cpu|cuda)[:]` + /// where `cpu` or `cuda` specifies the device type, and + /// `:` optionally specifies a device index. + /* implicit */ Device(const std::string& device_string); + + /// Returns true if the type and index of this `Device` matches that of + /// `other`. + bool operator==(const Device& other) const noexcept { + return this->type_ == other.type_ && this->index_ == other.index_; + } + + /// Returns true if the type or index of this `Device` differs from that of + /// `other`. + bool operator!=(const Device& other) const noexcept { + return !(*this == other); + } + + /// Sets the device index. + void set_index(DeviceIndex index) { + index_ = index; + } + + /// Returns the type of device this is. + DeviceType type() const noexcept { + return type_; + } + + /// Returns the optional index. + DeviceIndex index() const noexcept { + return index_; + } + + /// Returns true if the device has a non-default index. + bool has_index() const noexcept { + return index_ != -1; + } + + /// Return true if the device is of CUDA type. + bool is_cuda() const noexcept { + return type_ == DeviceType::CUDA; + } + + /// Return true if the device is of PrivateUse1 type. + bool is_privateuseone() const noexcept { + return type_ == DeviceType::PrivateUse1; + } + + /// Return true if the device is of MPS type. + bool is_mps() const noexcept { + return type_ == DeviceType::MPS; + } + + /// Return true if the device is of HIP type. + bool is_hip() const noexcept { + return type_ == DeviceType::HIP; + } + + /// Return true if the device is of VE type. + bool is_ve() const noexcept { + return type_ == DeviceType::VE; + } + + /// Return true if the device is of XPU type. + bool is_xpu() const noexcept { + return type_ == DeviceType::XPU; + } + + /// Return true if the device is of IPU type. + bool is_ipu() const noexcept { + return type_ == DeviceType::IPU; + } + + /// Return true if the device is of XLA type. + bool is_xla() const noexcept { + return type_ == DeviceType::XLA; + } + + /// Return true if the device is of MTIA type. + bool is_mtia() const noexcept { + return type_ == DeviceType::MTIA; + } + + /// Return true if the device is of HPU type. + bool is_hpu() const noexcept { + return type_ == DeviceType::HPU; + } + + /// Return true if the device is of Lazy type. + bool is_lazy() const noexcept { + return type_ == DeviceType::Lazy; + } + + /// Return true if the device is of Vulkan type. + bool is_vulkan() const noexcept { + return type_ == DeviceType::Vulkan; + } + + /// Return true if the device is of Metal type. + bool is_metal() const noexcept { + return type_ == DeviceType::Metal; + } + + /// Return true if the device is of MAIA type. + bool is_maia() const noexcept { + return type_ == DeviceType::MAIA; + } + + /// Return true if the device is of META type. + bool is_meta() const noexcept { + return type_ == DeviceType::Meta; + } + + /// Return true if the device is of CPU type. + bool is_cpu() const noexcept { + return type_ == DeviceType::CPU; + } + + /// Return true if the device supports arbitrary strides. + bool supports_as_strided() const noexcept { + return type_ != DeviceType::IPU && type_ != DeviceType::XLA && + type_ != DeviceType::Lazy && type_ != DeviceType::MTIA; + } + + /// Same string as returned from operator<<. + std::string str() const; + + private: + DeviceType type_; + DeviceIndex index_ = -1; + void validate() { + // Removing these checks in release builds noticeably improves + // performance in micro-benchmarks. + // This is safe to do, because backends that use the DeviceIndex + // have a later check when we actually try to switch to that device. + TORCH_INTERNAL_ASSERT_DEBUG_ONLY( + index_ >= -1, + "Device index must be -1 or non-negative, got ", + static_cast(index_)); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY( + !is_cpu() || index_ <= 0, + "CPU device index must be -1 or zero, got ", + static_cast(index_)); + } +}; + +C10_API std::ostream& operator<<(std::ostream& stream, const Device& device); + +} // namespace c10 + +namespace std { +template <> +struct hash { + size_t operator()(c10::Device d) const noexcept { + // Are you here because this static assert failed? Make sure you ensure + // that the bitmasking code below is updated accordingly! + static_assert(sizeof(c10::DeviceType) == 1, "DeviceType is not 8-bit"); + static_assert(sizeof(c10::DeviceIndex) == 1, "DeviceIndex is not 8-bit"); + // Note [Hazard when concatenating signed integers] + // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + // We must first convert to a same-sized unsigned type, before promoting to + // the result type, to prevent sign extension when any of the values is -1. + // If sign extension occurs, you'll clobber all of the values in the MSB + // half of the resulting integer. + // + // Technically, by C/C++ integer promotion rules, we only need one of the + // uint32_t casts to the result type, but we put in both for explicitness's + // sake. + uint32_t bits = static_cast(static_cast(d.type())) + << 16 | + static_cast(static_cast(d.index())); + return std::hash{}(bits); + } +}; +} // namespace std diff --git a/phivenv/Lib/site-packages/torch/include/c10/core/DeviceArray.h b/phivenv/Lib/site-packages/torch/include/c10/core/DeviceArray.h new file mode 100644 index 0000000000000000000000000000000000000000..df339e672cdd668b7a5a2bffa6ff8b9b538f8876 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/core/DeviceArray.h @@ -0,0 +1,28 @@ +#include +#include +#include +#include +#include + +namespace c10 { + +template +class DeviceArray { + public: + DeviceArray(c10::Allocator& allocator, size_t size) + : data_ptr_(allocator.allocate(size * sizeof(T))) { + static_assert(std::is_trivial_v, "T must be a trivial type"); + TORCH_INTERNAL_ASSERT( + 0 == (reinterpret_cast(data_ptr_.get()) % alignof(T)), + "c10::DeviceArray: Allocated memory is not aligned for this data type"); + } + + T* get() { + return static_cast(data_ptr_.get()); + } + + private: + c10::DataPtr data_ptr_; +}; + +} // namespace c10 diff --git a/phivenv/Lib/site-packages/torch/include/c10/core/DeviceGuard.h b/phivenv/Lib/site-packages/torch/include/c10/core/DeviceGuard.h new file mode 100644 index 0000000000000000000000000000000000000000..3f8ce0fab9f7a8f43f2bd7ec698531934d6702ff --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/core/DeviceGuard.h @@ -0,0 +1,202 @@ +#pragma once + +#include +#include +#include +#include +#include + +namespace c10 { + +/// RAII guard that sets a certain default device in its constructor, and +/// changes it back to the device that was originally active upon destruction. +/// +/// The device is always reset to the one that was active at the time of +/// construction of the guard. Even if you `set_device` after construction, the +/// destructor will still reset the device to the one that was active at +/// construction time. +/// +/// This device guard does NOT have an uninitialized state; it is guaranteed +/// to reset a device on exit. If you are in a situation where you *might* +/// want to setup a guard (i.e., are looking for the moral equivalent +/// of std::optional), see OptionalDeviceGuard. +class DeviceGuard { + public: + /// No default constructor; see Note [Omitted default constructor from RAII] + explicit DeviceGuard() = delete; + + /// Set the current device to the passed Device. + explicit DeviceGuard(Device device) : guard_(device) {} + + /// This constructor is for testing only. + explicit DeviceGuard( + Device device, + const impl::DeviceGuardImplInterface* impl) + : guard_(device, impl) {} + + ~DeviceGuard() = default; + + /// Copy is disallowed + DeviceGuard(const DeviceGuard&) = delete; + DeviceGuard& operator=(const DeviceGuard&) = delete; + + /// Move is disallowed, as DeviceGuard does not have an uninitialized state, + /// which is required for moves on types with nontrivial destructors. + DeviceGuard(DeviceGuard&& other) = delete; + DeviceGuard& operator=(DeviceGuard&& other) = delete; + + /// Sets the device to the given one. The specified device must be consistent + /// with the device type originally specified during guard construction. + /// + /// TODO: The consistency check here is inconsistent with StreamGuard's + /// behavior with set_stream, where a stream on a different device than + /// the original one isn't an error; we just reset the stream and then + /// switch devices. + void reset_device(at::Device device) { + guard_.reset_device(device); + } + + /// This method is for testing only. + void reset_device( + at::Device device, + const impl::DeviceGuardImplInterface* impl) { + guard_.reset_device(device, impl); + } + + /// Sets the device index to the given one. The device type is inferred + /// from the original device type the guard was constructed with. + void set_index(DeviceIndex index) { + guard_.set_index(index); + } + + /// Returns the device that was set at the time the guard was constructed. + Device original_device() const { + return guard_.original_device(); + } + + /// Returns the most recent device that was set using this device guard, + /// either from construction, or via set_device. + Device current_device() const { + return guard_.current_device(); + } + + private: + impl::InlineDeviceGuard guard_; +}; + +/** + * A OptionalDeviceGuard is an RAII class that sets a device to some value on + * initialization, and resets the device to its original value on destruction. + * Morally, a OptionalDeviceGuard is equivalent to std::optional, + * but with extra constructors and methods as appropriate. + * + * Besides its obvious use (optionally applying a DeviceGuard), + * OptionalDeviceGuard is often also used for the following idiom: + * + * OptionalDeviceGuard g; + * for (const auto& t : tensors) { + * g.set_device(t.device()); + * do_something_with(t); + * } + * + * This usage is marginally more efficient than constructing a DeviceGuard every + * iteration of the for loop, as it avoids an unnecessary device reset. + * + * Unlike DeviceGuard, a OptionalDeviceGuard may be uninitialized. This occurs + * when you use the nullary constructor, or pass a nullopt to the constructor. + * Uninitialized OptionalDeviceGuards do *nothing*; they do not know what the + * original device was and they do not reset on destruction. This is why + * original_device() and current_device() return std::optional rather + * than Device (as they do in DeviceGuard), and also is why we didn't just + * provide OptionalDeviceGuard by default and hide DeviceGuard from users. + * + * The semantics of an OptionalDeviceGuard are exactly explained by thinking + * of it as an std::optional. In particular, an initialized + * OptionalDeviceGuard doesn't restore device to its value at construction; it + * restores device to its value *at initialization*. So if you have the + * program: + * + * setDevice(1); + * OptionalDeviceGuard g; + * setDevice(2); + * g.reset_device(Device(DeviceType::CUDA, 3)); // initializes! + * + * On destruction, g will reset device to 2, rather than 1. + * + * An uninitialized OptionalDeviceGuard is distinct from a (initialized) + * DeviceGuard whose original_device_ and current_device_ match, since the + * DeviceGuard will still reset the device to original_device_. + */ +class OptionalDeviceGuard { + public: + /// Create an uninitialized guard. Set the guard later using reset_device. + explicit OptionalDeviceGuard() = default; + + /// Initialize the guard, setting the current device to the passed Device. + explicit OptionalDeviceGuard(Device device) : guard_(device) {} + + /// Initialize the guard if a Device is passed; otherwise leave the + /// guard uninitialized. + explicit OptionalDeviceGuard(std::optional device) : guard_(device) {} + + /// Constructor for testing only. + explicit OptionalDeviceGuard( + Device device, + const impl::DeviceGuardImplInterface* impl) + : guard_(device, impl) {} + + ~OptionalDeviceGuard() = default; + /// Copy is disallowed + OptionalDeviceGuard(const OptionalDeviceGuard&) = delete; + OptionalDeviceGuard& operator=(const OptionalDeviceGuard&) = delete; + + /// Move is disallowed + /// See Note [Explicit initialization of optional fields] + /// and // Note [Move construction for RAII guards is tricky] + /// for rationale. + OptionalDeviceGuard(OptionalDeviceGuard&& other) = delete; + OptionalDeviceGuard& operator=(OptionalDeviceGuard&& other) = delete; + + /// Sets the device to the given one. The specified device must be consistent + /// with the device type originally specified during guard construction. + void reset_device(at::Device device) { + guard_.reset_device(device); + } + + /// For testing only + void reset_device( + at::Device device, + const impl::DeviceGuardImplInterface* impl) { + guard_.reset_device(device, impl); + } + + /// Returns the device that was set at the time the guard was constructed. + std::optional original_device() const { + return guard_.original_device(); + } + + /// Returns the most recent device that was set using this device guard, + /// either from construction, or via reset_device. + std::optional current_device() const { + return guard_.current_device(); + } + + private: + impl::InlineOptionalDeviceGuard guard_{}; +}; + +// Note [Whither the DeviceGuard boilerplate] +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +// Design note: in principle, we could avoid these wrappers using: +// +// using DeviceGuard = impl::InlineDeviceGuard; +// using OptionalDeviceGuard = +// impl::InlineOptionalDeviceGuard; +// +// But the error messages are worse, and our users can't just look at the +// header file to find out what's going on. Furthermore, for specializations +// like CUDAStreamGuard, it can be profitable to replace some interfaces with +// refined types (e.g., return CUDAStream instead of Stream). So, we eat +// the boilerplate and write out the API explicitly. + +} // namespace c10 diff --git a/phivenv/Lib/site-packages/torch/include/c10/core/DeviceType.h b/phivenv/Lib/site-packages/torch/include/c10/core/DeviceType.h new file mode 100644 index 0000000000000000000000000000000000000000..b786a3926ff290eb724bed470e12134d039f273c --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/core/DeviceType.h @@ -0,0 +1,123 @@ +#pragma once + +// This is directly synchronized with caffe2/proto/caffe2.proto, but +// doesn't require me to figure out how to get Protobuf headers into +// ATen/core (which would require a lot more build system hacking.) +// If you modify me, keep me synchronized with that file. + +#include + +#include +#include +#include +#include +#include + +namespace c10 { + +// These contains all device types that also have a BackendComponent +// and therefore participate in per-backend functionality dispatch keys. +// This is most backends except PrivateUse2 and PrivateUse3 +#define C10_FORALL_BACKEND_DEVICE_TYPES(_, extra) \ + _(CPU, extra) \ + _(CUDA, extra) \ + _(HIP, extra) \ + _(XLA, extra) \ + _(MPS, extra) \ + _(IPU, extra) \ + _(XPU, extra) \ + _(HPU, extra) \ + _(VE, extra) \ + _(Lazy, extra) \ + _(Meta, extra) \ + _(MTIA, extra) \ + _(PrivateUse1, extra) + +enum class DeviceType : int8_t { + CPU = 0, + CUDA = 1, // CUDA. + MKLDNN = 2, // Reserved for explicit MKLDNN + OPENGL = 3, // OpenGL + OPENCL = 4, // OpenCL + IDEEP = 5, // IDEEP. + HIP = 6, // AMD HIP + FPGA = 7, // FPGA + MAIA = 8, // ONNX Runtime / Microsoft + XLA = 9, // XLA / TPU + Vulkan = 10, // Vulkan + Metal = 11, // Metal + XPU = 12, // XPU + MPS = 13, // MPS + Meta = 14, // Meta (tensors with no data) + HPU = 15, // HPU / HABANA + VE = 16, // SX-Aurora / NEC + Lazy = 17, // Lazy Tensors + IPU = 18, // Graphcore IPU + MTIA = 19, // Meta training and inference devices + PrivateUse1 = 20, // PrivateUse1 device + // NB: If you add more devices: + // - Change the implementations of DeviceTypeName and isValidDeviceType + // in DeviceType.cpp + // - Change the number below + COMPILE_TIME_MAX_DEVICE_TYPES = 21, +}; + +constexpr DeviceType kCPU = DeviceType::CPU; +constexpr DeviceType kCUDA = DeviceType::CUDA; +constexpr DeviceType kHIP = DeviceType::HIP; +constexpr DeviceType kFPGA = DeviceType::FPGA; +constexpr DeviceType kMAIA = DeviceType::MAIA; +constexpr DeviceType kXLA = DeviceType::XLA; +constexpr DeviceType kMPS = DeviceType::MPS; +constexpr DeviceType kMeta = DeviceType::Meta; +constexpr DeviceType kVulkan = DeviceType::Vulkan; +constexpr DeviceType kMetal = DeviceType::Metal; +constexpr DeviceType kXPU = DeviceType::XPU; +constexpr DeviceType kHPU = DeviceType::HPU; +constexpr DeviceType kVE = DeviceType::VE; +constexpr DeviceType kLazy = DeviceType::Lazy; +constexpr DeviceType kIPU = DeviceType::IPU; +constexpr DeviceType kMTIA = DeviceType::MTIA; +constexpr DeviceType kPrivateUse1 = DeviceType::PrivateUse1; + +// define explicit int constant +constexpr int COMPILE_TIME_MAX_DEVICE_TYPES = + static_cast(DeviceType::COMPILE_TIME_MAX_DEVICE_TYPES); + +static_assert( + COMPILE_TIME_MAX_DEVICE_TYPES <= 21, + "Hey! You seem to be adding a lot of new DeviceTypes. The intent was " + "for this constant to reflect the actual number of DeviceTypes we support " + "in PyTorch; it's important that this number is not too large as we " + "use this to allocate stack arrays in some places in our code. If you " + "are indeed just adding the 20th device type, feel free to change " + "the check to 32; but if you are adding some sort of extensible device " + "types registration, please be aware that you are affecting code that " + "this number is small. Try auditing uses of this constant."); + +C10_API std::string DeviceTypeName(DeviceType d, bool lower_case = false); + +C10_API bool isValidDeviceType(DeviceType d); + +C10_API std::ostream& operator<<(std::ostream& stream, DeviceType type); + +C10_API void register_privateuse1_backend(const std::string& backend_name); +C10_API std::string get_privateuse1_backend(bool lower_case = true); + +C10_API bool is_privateuse1_backend_registered(); + +} // namespace c10 + +namespace std { +template <> +struct hash { + std::size_t operator()(c10::DeviceType k) const { + return std::hash()(static_cast(k)); + } +}; +} // namespace std + +namespace torch { +// NOLINTNEXTLINE(misc-unused-using-decls) +using c10::DeviceType; +} // namespace torch diff --git a/phivenv/Lib/site-packages/torch/include/c10/core/DispatchKey.h b/phivenv/Lib/site-packages/torch/include/c10/core/DispatchKey.h new file mode 100644 index 0000000000000000000000000000000000000000..06f9680312a03de5a62bfabc8d301ca55c452a2d --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/core/DispatchKey.h @@ -0,0 +1,743 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace c10 { + +// Semantically, each value of BackendComponent identifies a "backend" for our +// dispatch. Some functionalities that we may dispatch to are allowed to +// register different handlers for each backend. The BackendComponent is then +// used to figure out which backend implementation to dispatch to. + +// In implementation terms, the backend component identifies a specific "bit" in +// a DispatchKeySet. The bits in the DispatchKeySet are split between the bottom +// ~12 "BackendComponent" bits, while the remaining upper bits are assigned to +// functionalities. When we encounter a functionality bit that is known to be +// customizable per-backend, then we also look at the lower BackendComponent +// bits and take the highest bit to determine which backend's implementation to +// use. + +// WARNING! If you add a new backend component to the end of this list, +// make sure you register it before Meta. +// Meta must be at the end so that meta key in tls triggers meta kernels. +// (But you shouldn't: private use keys should have higher precedence than all +// built-in keys) + +// If you add a new (non-privateuse) backend here, +// make sure to add an Autograd fallthrough kernel +// in aten/src/ATen/core/VariableFallbackKernel.cpp + +#define C10_FORALL_BACKEND_COMPONENTS(_, extra) \ + _(CPU, extra) \ + _(CUDA, extra) \ + _(HIP, extra) \ + _(XLA, extra) \ + _(MPS, extra) \ + _(IPU, extra) \ + _(XPU, extra) \ + _(HPU, extra) \ + _(VE, extra) \ + _(Lazy, extra) \ + _(MTIA, extra) \ + _(MAIA, extra) \ + _(PrivateUse1, extra) \ + _(PrivateUse2, extra) \ + _(PrivateUse3, extra) \ + _(Meta, extra) + +// WARNING! If we add a new per-backend functionality key that has higher +// priority than Autograd, then make sure you update EndOfRuntimeBackendKeys + +#define C10_FORALL_FUNCTIONALITY_KEYS(_) \ + _(Dense, ) \ + _(Quantized, Quantized) \ + _(Sparse, Sparse) \ + _(SparseCsr, SparseCsr) \ + _(NestedTensor, NestedTensor) \ + _(AutogradFunctionality, Autograd) + +enum class BackendComponent : uint8_t { + + // A "backend" is colloquially used to refer to handlers for dispatch + // which actually implement the numerics of an operation in question. + // + // Due to the nature of the enum, these backends are specified in + // an ordered way, but for most backends this order is not semantically + // meaningful (e.g., it's valid to reorder these backends without changing + // semantics). The only situation when backend ordering is meaningful + // is when the backend participates in multiple dispatch with another + // backend; e.g., CPU and CUDA (cuda must have higher priority). + + // These keys don't correspond to individual kernels. + // Instead, they represent the backends that are allowed to override specific + // pieces of functionality: + // - dense kernels (e.g. DispatchKey::CPU) + // - sparse kernels (e.g. DispatchKey::SparseCPU) + // - quantized kernels (e.g. DispatchKey::QuantizedCPU) + // - autograd kernels (e.g. DispatchKey::AutogradCPU) + // We reserve space in the runtime operator table for this full cross product + // of + // [backends in this enum] x [keys below that are explicitly marked as having + // per-backend functionality] + // + // A meta tensor is a tensor without any data associated with it. (They + // have also colloquially been referred to as tensors on the "null" device). + // A meta tensor can be used to dry run operators without actually doing any + // computation, e.g., add on two meta tensors would give you another meta + // tensor with the output shape and dtype, but wouldn't actually add anything. + + InvalidBit = 0, +#define DEFINE_BACKEND_COMPONENT(n, _) n##Bit, + C10_FORALL_BACKEND_COMPONENTS(DEFINE_BACKEND_COMPONENT, unused) +#undef DEFINE_BACKEND_COMPONENT + + // Define an alias to represent end of backend dispatch keys. + // If you add new backend keys after PrivateUse3, please also update it here. + EndOfBackendKeys = MetaBit, +}; + +// Semantically, a dispatch key identifies a possible "level" in our +// dispatch, for which a handler may be registered. Each handler corresponds +// to a type of functionality. +// +// In implementation terms, the dispatch key identifies a specific "bit" in a +// DispatchKeySet. Higher bit indexes get handled by dispatching first (because +// we "count leading zeros" when we extract the highest priority dispatch +// key.) +// +// Note [DispatchKey Classification] +// This enum actually contains several types of keys, which are explained +// in more detail further down: +// (1) non-customizable backends (e.g. FPGA) +// (2) non-customizable functionalities (e.g. Functionalize) +// (3) functionalized that are customizable per backend (e.g. Dense, Sparse, +// AutogradFunctionality) (4) per-backend instances of customizable +// functionalities (e.g. CPU, SparseCPU, AutogradCPU) (5) alias keys (e.g. +// CompositeImplicitAutograd) +// +// Of the categories above, it's important to note: +// (a) which keys are assigned individual bits in a DispatchKeySet +// (b) which keys are assigned individual slots in the runtime operator table +// ("Runtime keys") +// +// (1), (2) and (3) all get their own dedicated bits in the DispatchKeySet. +// (1), (2) and (4) all get their own dedicated slots in the runtime operator +// table. + +// See Note [DispatchKeySet Internal Representation] for more details. +// +// NOTE: Keep the list in sync with `DispatchKey` in torchgen/model.py +enum class DispatchKey : uint16_t { + + // ~~~~~~~~~~~~~~~~~~~~~~~~~~ UNDEFINED ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ // + // This is not a "real" functionality, but it exists to give us a "nullopt" + // element we can return for cases when a DispatchKeySet contains no elements. + // You can think a more semantically accurate definition of DispatchKey is: + // + // using DispatchKey = std::optional + // + // and Undefined == nullopt. We didn't actually represent + // it this way because std::optional would take two + // words, when DispatchKey fits in eight bits. + + Undefined = 0, + + // Define an alias for Undefined to represent CatchAll (long term + // this will get eliminated, but for now it's convenient) + CatchAll = Undefined, + + // ~~~~~~~~~~~~~~~~~~~~~~~~~~ Functionality Keys ~~~~~~~~~~~~~~~~~~~~~~ // + // Every value in the enum (up to EndOfFunctionalityKeys) + // corresponds to an individual "functionality" that can be dispatched to. + // This is represented in the DispatchKeySet by assigning each of these enum + // values + // to each of the remaining (64 - len(BackendComponent)) bits. + // + // Most of these functionalities have a single handler assigned to them, + // making them "runtime keys". + // That map to a single slot in the runtime operator table. + // + // A few functionalities are allowed to be customizable per backend. + // See [Note: Per-Backend Functionality Dispatch Keys] for details. + + // See [Note: Per-Backend Functionality Dispatch Keys] + Dense, + + // Below are non-extensible backends. + // These are backends that currently don't have their own overrides for + // Autograd/Sparse/Quantized kernels, + // and we therefore don't waste space in the runtime operator table allocating + // space for them. + // If any of these backends ever need to customize, e.g., Autograd, then we'll + // need to add a DispatchKey::*Bit for them. + + // TODO: put this in BackendComponents + FPGA, // Xilinx support lives out of tree at + // https://gitlab.com/pytorch-complex/vitis_kernels + + Vulkan, // TODO: put this in BackendComponents + Metal, // TODO: put this in BackendComponents + + // See [Note: Per-Backend Functionality Dispatch Keys] + Quantized, + + // This backend is to support custom RNGs; it lets you go + // to a different kernel if you pass in a generator that is not a + // traditional CPUGeneratorImpl/CUDAGeneratorImpl. To make use of this + // key: + // 1) set it as a second parameter of at::Generator constructor call in + // the user-defined PRNG class. + // 2) use it as a dispatch key while registering custom kernels + // (templatized kernels specialized for user-defined PRNG class) + // intended for out of tree use; tested by aten/src/ATen/test/rng_test.cpp + CustomRNGKeyId, + + // TODO: Make Mkldnn a functionality key, so we can give it Meta + // support + // Here are backends which specify more specialized operators + // based on the layout of the tensor. Note that the sparse backends + // are one case where ordering matters: sparse multi-dispatches with + // the corresponding dense tensors, and must be handled before them. + MkldnnCPU, // registered at build/aten/src/ATen/RegisterMkldnnCPU.cpp + // NB: not to be confused with MKLDNN, which is Caffe2 only + + // See [Note: Per-Backend Functionality Dispatch Keys] + Sparse, + + SparseCsr, + + NestedTensor, + + // In some situations, it is not immediately obvious what the correct + // backend for function is, because the function in question doesn't + // have any "tensor" arguments. In this case, a BackendSelect function + // can be registered to implement the custom determination of the + // correct backend. + BackendSelect, + + Python, + + // Out-of-core key for Fake Tensor in torchdistx. + // See https://pytorch.org/torchdistx/latest/fake_tensor.html + // TODO: delete this in favor of Python-implemented fake tensor + Fake, + // See Note [Out-of-tree vmap+grad prototype]. The purpose of this key + // is to insert code after the "autograd subsystem" runs, so this key should + // be directly after ADInplaceOrView and all of the autograd keys. + FuncTorchDynamicLayerBackMode, + + // Alias and mutation removal. + // If some backends want to opt into only alias removal or only mutation + // removal, + // we can consider adding separate keys dedicated to those individual passes. + // See Note [Functionalization Pass In Core] for details. + Functionalize, + + // The named dispatch key is set for any tensors with named dimensions. + // Although we have a dispatch key for named tensors, for historical reasons, + // this dispatch key doesn't do any of the substantive functionality for named + // tensor (though, hypothetically, it could!) At the moment, it's just + // responsible for letting us give good error messages when operations + // don't support named tensors. + // + // NB: If you ever consider moving named tensor functionality into + // this dispatch key, note that it might be necessary add another dispatch + // key that triggers before composite operators, in case a composite operator + // has named dimension propagation that doesn't match that of its + // constituent parts. + // TODO: delete this once torchdim lands in functorch + Named, + + // The Conjugate dispatch key is set for any tensors that need to perform + // conjugation + // This is implemented at a dispatch level right before any backends run + Conjugate, + + // The Negative dispatch key is set for any tensors that need to perform + // negation + // This is implemented at a dispatch level right before any backends run + Negative, + + ZeroTensor, // registered at build/aten/src/ATen/RegisterZeroTensor.cpp + + // Note [ADInplaceOrView key] + // ADInplaceOrView key is used by inplace or view ops to register a kernel + // that does additional setup for future autograd computation. + // + // 1. For inplace ops this kernel does version bump + // 2. For view ops this kernel does `as_view` setup where we properly setup + // DifferentiableViewMeta on the view tensors. + // + // For other ops it's fallthrough kernel since there's no extra + // work to do. + // + // Note [Dream: skip VariableType kernel when requires_grad=false] + // + // In an ideal world where we can skip VariableType kernel for inputs + // with requires_grad=false, instead of a fallthrough kernel, we'll + // register a kernel shown below to all functional ops as well: + // torch::Tensor my_functional_op(...) { + // { + // // Note for every op in VariableType, you need to go through + // // `AutoDispatchBelowADInplaceOrView` guard exactly once to add the + // // key to TLS excluded set. If you don't go through it at all, + // // inplace/view ops called through `at::` inside your backend + // // kernel will dispatch to ADInplaceOrView kernels and do a lot + // // of extra work. + // at::AutoDispatchBelowADInplaceOrView guard; + // at::redispatch::my_functional_op(...); + // } + // } + // But this work is currently blocked since it adds an extra dispatch + // for all ops and it's non-trivial overhead at model level(a few percents). + // Thus our current approach takes advantage of the fact every kernel go + // through VariableType kernel first and pulls the + // `at::AutoDispatchBelowADInplaceOrView` guard of functional ops + // up to the `VariableType` kernel. Thus we only add the extra dispatch + // to view/inplace ops to minimize its perf impact to real models. + ADInplaceOrView, + // Note [Alias Dispatch Key : Autograd] + // All backends are oblivious to autograd; autograd is handled as a + // layer which happens on top of all backends. It inspects the autograd + // metadata of all inputs, determines what autograd metadata should be + // constructed by the output, and otherwise defers to the backend to + // actually do the numeric computation. Autograd contains + // the bulk of this logic. + + // Autograd is now an alias dispatch key which by default maps to all + // backend-specific autograd keys. + // Backend-specific allow backends to override the default kernel registered + // to Autograd key as needed. + // For example, XLA wants to define autograd for einsum directly. + // Registering a custom autograd implementation at the XLA key won't work + // because we process Autograd before XLA. This key has higher priority and + // gets processed first. You generally should NOT redispatch after handling + // autograd here (since that would result in execution of the Autograd + // operator, which you're trying to skip). In AutogradXLA implementations, + // you are responsible for handling autograd yourself, or deferring to other + // operators which support autograd. + + // Currently we only have backend-specific autograd keys for CPU/CUDA/XLA and + // reserved user-defined backends. All other in-tree backends share the + // AutogradOther key. We can add specific autograd key for those backends + // upon request. + AutogradOther, + + // See [Note: Per-Backend Functionality Dispatch Keys] + AutogradFunctionality, + + // NestedTensor is an example of something that isn't a "real backend" + // (because it mostly consists of redispatching kernels) + // but it would like to override autograd functionality in C++. + // We can handle cases like this by adding an extra functionality key + // exclusively for handling autograd for NestedTensor. + // lives out of tree at + // https://github.com/pytorch/nestedtensor + AutogradNestedTensor, + + Tracer, + + // TODO: make Autocast a functionality key + // Autocasting precedes VariableTypeId, to ensure casts are autograd-exposed + // and inputs are saved for backward in the post-autocast type. + AutocastCPU, + AutocastMTIA, + AutocastMAIA, + AutocastXPU, + AutocastIPU, + AutocastHPU, + AutocastXLA, + // AutocastXLA is only being used for TPUs. XLA GPUs continue to use + // AutocastCUDA. + AutocastMPS, + AutocastCUDA, + AutocastPrivateUse1, + + // ~~~~~~~~~~~~~~~~~~~~~~~~~~~ WRAPPERS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ // + // There are a number of alternative modes which may want to handle before + // autograd; for example, error checking, tracing, profiling or vmap. They + // go here. + + FuncTorchBatched, // See Note [Out-of-tree vmap+grad prototype] + + // Dispatch key for BatchedTensorImpl wrapping a nested tensor. + BatchedNestedTensor, + + FuncTorchVmapMode, // See Note [Out-of-tree vmap+grad prototype] + + // This is the dispatch key for BatchedTensorImpl, which is used to implement + // batching rules for vmap. + Batched, + + // When we are inside a vmap, all tensors dispatch on this key. + // See Note: [DispatchKey::VmapMode usage] for more details. + VmapMode, + + FuncTorchGradWrapper, // See Note [Out-of-tree vmap+grad prototype] + + // Out-of-core key for Deferred Module Initialization in torchdistx. + // See https://pytorch.org/torchdistx/latest/deferred_init.html + DeferredInit, + + // Used by Python key logic to know the set of tls on entry to the dispatcher + // This kernel assumes it is the top-most non-functorch-related DispatchKey. + // If you add a key above, make sure to update the fallback implementation for + // this. + PythonTLSSnapshot, + + // This key should be at the very top of the dispatcher + FuncTorchDynamicLayerFrontMode, // See Note [Out-of-tree vmap+grad prototype] + + // TESTING: This is intended to be a generic testing tensor type id. + // Don't use it for anything real; its only acceptable use is within a single + // process test. Use it by creating a TensorImpl with this DispatchKey, and + // then registering operators to operate on this type id. See + // aten/src/ATen/core/dispatch/backend_fallback_test.cpp for a usage example. + TESTING_ONLY_GenericWrapper, + + // TESTING: This is intended to be a generic testing tensor type id. + // Don't use it for anything real; its only acceptable use is within a ingle + // process test. Use it by toggling the mode on and off via + // TESTING_ONLY_tls_generic_mode_set_enabled and then registering operators + // to operate on this type id. See + // aten/src/ATen/core/dispatch/backend_fallback_test.cpp + // for a usage example + TESTING_ONLY_GenericMode, + + // This key is used for pre-dispatch tracing in make_fx. + // It has lower priority than the PythonDispatcher key + // because we use the PythonDispatcher to intercept the key from python, + // and avoid having to implement it in C++. + PreDispatch, + + // This is a bypass that allows you to skip running the C++ dispatcher + // entirely + PythonDispatcher, + + // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FIN ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ // + EndOfFunctionalityKeys, // End of functionality keys. + +// ~~~~~~~~~~~~~~ "Dense" Per-Backend Dispatch keys ~~~~~~~~~~~~~~~~~~~~ // +// Here are backends which you think of as traditionally specifying +// how to implement operations on some device. + +#define DEFINE_PER_BACKEND_KEYS_FOR_BACKEND(n, prefix) prefix##n, + +#define DEFINE_PER_BACKEND_KEYS(fullname, prefix) \ + StartOf##fullname##Backends, \ + C10_FORALL_BACKEND_COMPONENTS( \ + DEFINE_PER_BACKEND_KEYS_FOR_BACKEND, prefix) \ + EndOf##fullname##Backends = prefix##Meta, + + C10_FORALL_FUNCTIONALITY_KEYS(DEFINE_PER_BACKEND_KEYS) + +#undef DEFINE_PER_BACKEND_KEYS +#undef DEFINE_PER_BACKEND_KEYS_FOR_BACKEND + + EndOfRuntimeBackendKeys = EndOfAutogradFunctionalityBackends, + + // ~~~~~~~~~~~~~~~~~~~~~~ Alias Dispatch Keys ~~~~~~~~~~~~~~~~~~~~~~~~~~ // + // Note [Alias Dispatch Keys] + // Alias dispatch keys are synthetic dispatch keys which map to multiple + // runtime dispatch keys. Alisa keys have precedence, but they are always + // lower precedence than runtime keys. You can register a kernel to an + // alias key, the kernel might be populated to the mapped runtime keys + // during dispatch table computation. + // If a runtime dispatch key has multiple kernels from alias keys, which + // kernel wins is done based on the precedence of alias keys (but runtime + // keys always have precedence over alias keys). + // Alias keys won't be directly called during runtime. + + // See Note [Alias Dispatch Key : Autograd] + Autograd, + CompositeImplicitAutograd, // registered at + // build/aten/src/ATen/RegisterCompositeImplicitAutograd.cpp + + // Note: The alias keyset for FuncTorchBatchedDecomposition is disjoint from + // all + // other alias keysets + // and so precedence order doesn't matter + FuncTorchBatchedDecomposition, // registered at + // build/aten/src/ATen/RegisterFuncTorchBatchedDecomposition.cpp + // Note: The alias keyset for CompositeImplicitAutogradNestedTensor is + // disjoint from all other alias keysets + CompositeImplicitAutogradNestedTensor, // registered at + // build/aten/src/ATen/RegisterCompositeImplicitAutogradNestedTensor.cpp + CompositeExplicitAutograd, // registered at + // build/aten/src/ATen/RegisterCompositeExplicitAutograd.cpp + // See Note [CompositeExplicitAutogradNonFunctional Key] + CompositeExplicitAutogradNonFunctional, // registered at + // build/aten/src/ATen/RegisterCompositeExplicitAutograd.cpp + + // Define an alias key to represent end of alias dispatch keys. + // If you add new alias keys after Autograd, please also update it here. + StartOfAliasKeys = Autograd, + EndOfAliasKeys = CompositeExplicitAutogradNonFunctional, // + + // ~~~~~~~~~~~~~~~~~~~~~~~~~ BC ALIASES ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ // + // The aliases exist for backwards compatibility reasons, they shouldn't + // be used + CPUTensorId = CPU, + CUDATensorId = CUDA, + DefaultBackend = CompositeExplicitAutograd, + PrivateUse1_PreAutograd = AutogradPrivateUse1, + PrivateUse2_PreAutograd = AutogradPrivateUse2, + PrivateUse3_PreAutograd = AutogradPrivateUse3, + Autocast = AutocastCUDA, +}; + +// Note [Private use DispatchKey] +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~ +// Private use tensor IDs are preallocated tensor type IDs for use in user +// applications. Similar to private use fields in HTTP, they can be used +// by end users for experimental or private applications, without needing +// to "standardize" the tensor ID (which would be done by submitting a PR +// to PyTorch to add your type ID). +// +// Private use tensor IDs are appropriate to use if you want to experiment +// with adding a new tensor type (without having to patch PyTorch first) or +// have a private, non-distributed application that needs to make use of a +// new tensor type. Private use tensor IDs are NOT appropriate to use for +// libraries intended to be distributed to further users: please contact +// the PyTorch developers to get a type ID registered in this case. +// +// We provide two classes of private user tensor id: regular DispatchKeys +// and Autograd DispatchKeys. DispatchKeys serve the role of ordinary "backend" +// DispatchKeys; if you were adding support for a new type of accelerator, you +// would use a backend DispatchKey, and ideally automatically reuse +// AutogradOther definitions already defined in PyTorch. AutogradPrivateUse +// DispatchKeys serve as "wrapper" DispatchKeys: they are only necessary for +// tensors that compose multiple internal tensors, and for cases when the +// built-in autograd formulas for operators are not appropriate. + +static_assert( + (static_cast(BackendComponent::EndOfBackendKeys) + + static_cast(DispatchKey::EndOfFunctionalityKeys)) <= 64, + "The BackendComponent and DispatchKey enums (below EndOfFunctionalityKeys)" + " both map to backend and functionality bits" + " into a 64-bit bitmask; you must have less than 64 total entries between them"); + +// Check if a DispatchKey is an alias mapping to other runtime keys. +constexpr bool isAliasDispatchKey(DispatchKey k) { + return k >= DispatchKey::StartOfAliasKeys && k <= DispatchKey::EndOfAliasKeys; +} + +// [Note: Per-Backend Functionality Dispatch Keys] +// Check if a DispatchKey is a per-backend functionality key +// Any functionalities that can be customized per-backend should be added here. +// These keys correspond to functionalities that can be customized individually +// per backend. While they only take up one bit in the `DispatchKeySet` bitset, +// they map to (# backends) slots in the operator table. +// Each of these keys also has a separate set of "runtime keys" in the dispatch +// key enum, per backend, which *do* map to the individual operator table slots. +// For example, the "Sparse" key maps to an individual bit in the +// DispatchKeySet, while `SparseCPU`, `SparseCUDA`, etc all map to individual +// slots in the runtime operator table. + +constexpr bool isPerBackendFunctionalityKey(DispatchKey k) { + if (k == DispatchKey::Dense || k == DispatchKey::Quantized || + k == DispatchKey::Sparse || k == DispatchKey::SparseCsr || + k == DispatchKey::AutogradFunctionality || + k == DispatchKey::NestedTensor) { + return true; + } else { + return false; + } +} + +// Note that this includes Undefined in the total count. +// BUT EndOfFunctionalityKeys is its own (placeholder) key. +// e.g. Undefined=0, Dense=1, Sparse=2, EndOfFunctionalityKeys=3. +// In the above example, there are 3 total functionality keys. +constexpr uint8_t num_functionality_keys = + static_cast(DispatchKey::EndOfFunctionalityKeys); + +constexpr uint8_t num_backends = + static_cast(BackendComponent::EndOfBackendKeys); + +// Note [No More Than 16 Backends] +// Search for this note to find places in the code where the "no more than 16 +// backends" invariant is baked in. +static_assert( + static_cast(BackendComponent::EndOfBackendKeys) <= 16, + "BackendComponent currently only supports <= 16 backends. If we really need to extend this, \ +there are a few places where this invariant is baked in"); + +constexpr uint8_t numPerBackendFunctionalityKeys() { + uint8_t count = 0; + for (uint8_t k = 0; k <= num_functionality_keys; ++k) { + if (isPerBackendFunctionalityKey(static_cast(k))) + ++count; + } + return count; +} + +#if defined(C10_MOBILE_TRIM_DISPATCH_KEYS) +// See [Note: Trimmed Mobile Dispatch Keys] +constexpr uint16_t num_runtime_entries = 8; +#else +constexpr uint16_t num_runtime_entries = num_functionality_keys + + (numPerBackendFunctionalityKeys() * (num_backends - 1)); +#endif + +// See Note [No More Than 16 Backends] +constexpr uint16_t full_backend_mask = + (static_cast(1) << num_backends) - 1; + +C10_API const char* toString(DispatchKey); +C10_API const char* toString(BackendComponent); +C10_API std::ostream& operator<<(std::ostream&, DispatchKey); +C10_API std::ostream& operator<<(std::ostream&, BackendComponent); + +C10_API DispatchKey getAutogradKeyFromBackend(BackendComponent k); + +// Parses a string into a dispatch key. +// If the string cannot be correctly parsed, throws an exception. +C10_API c10::DispatchKey parseDispatchKey(const std::string& k); + +// These are some convenience identifiers for dispatch keys which are +// shorter to type than their long counterparts. Note that some of these +// dispatch keys directly correspond to DeviceType; and most APIs that +// accept DispatchKey also accept DeviceType; e.g., +// torch::dispatch(torch::kCPU, ...) is also valid. +constexpr DispatchKey kAutograd = DispatchKey::Autograd; + +// See Note [The Ordering of Per-Backend Dispatch Keys Matters!] +// This function relies on the invariant that the dispatch keys between +// StartOfDenseBackends and EndOfRuntimeBackendKeys are ordered by backend +// in the same order as `BackendComponent`. +constexpr BackendComponent toBackendComponent(DispatchKey k) { + if (k >= DispatchKey::StartOfDenseBackends && + k <= DispatchKey::EndOfDenseBackends) { + return static_cast( + static_cast(k) - + static_cast(DispatchKey::StartOfDenseBackends)); + } else if ( + k >= DispatchKey::StartOfQuantizedBackends && + k <= DispatchKey::EndOfQuantizedBackends) { + return static_cast( + static_cast(k) - + static_cast(DispatchKey::StartOfQuantizedBackends)); + } else if ( + k >= DispatchKey::StartOfSparseBackends && + k <= DispatchKey::EndOfSparseBackends) { + return static_cast( + static_cast(k) - + static_cast(DispatchKey::StartOfSparseBackends)); + } else if ( + k >= DispatchKey::StartOfSparseCsrBackends && + k <= DispatchKey::EndOfSparseCsrBackends) { + return static_cast( + static_cast(k) - + static_cast(DispatchKey::StartOfSparseCsrBackends)); + } else if ( + k >= DispatchKey::StartOfNestedTensorBackends && + k <= DispatchKey::EndOfNestedTensorBackends) { + return static_cast( + static_cast(k) - + static_cast(DispatchKey::StartOfNestedTensorBackends)); + } else if ( + k >= DispatchKey::StartOfAutogradFunctionalityBackends && + k <= DispatchKey::EndOfAutogradFunctionalityBackends) { + return static_cast( + static_cast(k) - + static_cast( + DispatchKey::StartOfAutogradFunctionalityBackends)); + } else { + return BackendComponent::InvalidBit; + } +} + +constexpr DispatchKey toFunctionalityKey(DispatchKey k) { + if (k <= DispatchKey::EndOfFunctionalityKeys) { + return k; + } else if (k <= DispatchKey::EndOfDenseBackends) { + return DispatchKey::Dense; + } else if (k <= DispatchKey::EndOfQuantizedBackends) { + return DispatchKey::Quantized; + } else if (k <= DispatchKey::EndOfSparseBackends) { + return DispatchKey::Sparse; + } else if (k <= DispatchKey::EndOfSparseCsrBackends) { + return DispatchKey::SparseCsr; + } else if (k <= DispatchKey::EndOfNestedTensorBackends) { + return DispatchKey::NestedTensor; + } else if (k <= DispatchKey::EndOfAutogradFunctionalityBackends) { + return DispatchKey::AutogradFunctionality; + } else { + return DispatchKey::Undefined; + } +} + +BackendComponent toBackendComponent(DeviceType device_type); + +// Given (DispatchKey::Dense, BackendComponent::CUDABit), returns +// DispatchKey::CUDA. +// See Note [The Ordering of Per-Backend Dispatch Keys Matters!] +// This function relies on the invariant that the dispatch keys between +// StartOfDenseBackends and EndOfRuntimeBackendKeys are ordered by backend +// in the same order as `BackendComponent`. +constexpr DispatchKey toRuntimePerBackendFunctionalityKey( + DispatchKey functionality_k, + BackendComponent backend_k) { + if (functionality_k == DispatchKey::Dense) { + return static_cast( + static_cast(DispatchKey::StartOfDenseBackends) + + static_cast(backend_k)); + } + if (functionality_k == DispatchKey::Sparse) { + return static_cast( + static_cast(DispatchKey::StartOfSparseBackends) + + static_cast(backend_k)); + } + if (functionality_k == DispatchKey::SparseCsr) { + return static_cast( + static_cast(DispatchKey::StartOfSparseCsrBackends) + + static_cast(backend_k)); + } + if (functionality_k == DispatchKey::Quantized) { + return static_cast( + static_cast(DispatchKey::StartOfQuantizedBackends) + + static_cast(backend_k)); + } + if (functionality_k == DispatchKey::NestedTensor) { + return static_cast( + static_cast(DispatchKey::StartOfNestedTensorBackends) + + static_cast(backend_k)); + } + if (functionality_k == DispatchKey::AutogradFunctionality) { + return static_cast( + static_cast( + DispatchKey::StartOfAutogradFunctionalityBackends) + + static_cast(backend_k)); + } + return DispatchKey::Undefined; +} + +} // namespace c10 + +namespace torch { +// Expose the constant, but not the TYPE (DispatchKey is an implementation +// detail!) +// NOLINTNEXTLINE(misc-unused-using-decls) +using c10::kAutograd; +} // namespace torch + +// NB: You really shouldn't use this instance; this enum is guaranteed +// to be pretty small so a regular array should be acceptable. +namespace std { +template <> +struct hash { + typedef size_t result_type; + typedef c10::DispatchKey argument_type; + + size_t operator()(c10::DispatchKey x) const { + return static_cast(x); + } +}; +} // namespace std diff --git a/phivenv/Lib/site-packages/torch/include/c10/core/DispatchKeySet.h b/phivenv/Lib/site-packages/torch/include/c10/core/DispatchKeySet.h new file mode 100644 index 0000000000000000000000000000000000000000..a72d289e02c08b7b765a6b3cc46e9ef8e93a17e5 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/core/DispatchKeySet.h @@ -0,0 +1,968 @@ +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace c10 { + +struct FunctionalityOffsetAndMask { + // empty constructor shouldn't be used; only needed to initialize + // the array before populating it. + FunctionalityOffsetAndMask() = default; + FunctionalityOffsetAndMask(uint16_t offset, uint16_t mask) + : offset(offset), mask(mask) {} + // This needs to big enough to cover the size of the operator table. + uint16_t offset{}; + // See Note [No More Than 16 Backends] + // This mask needs to be big enough to mask all of the backend bits. + // We probably don't ever want to have more than 16 backend bits, so uint16_t + // should be enough. + uint16_t mask{}; +}; +static_assert( + c10::num_runtime_entries < 65536, + "The dispatcher currently only supports up to 2^16 runtime entries"); + +C10_API std::array +initializeFunctionalityOffsetsAndMasks(); + +C10_ALWAYS_INLINE static const std:: + array& + offsetsAndMasks() { + static auto offsets_and_masks_ = initializeFunctionalityOffsetsAndMasks(); + return offsets_and_masks_; +} + +// A representation of a set of DispatchKeys. A DispatchKeySet contains both +// "functionality" bits and "backend bits", and every tensor holds its own +// DispatchKeySet. The Dispatcher implements multiple dispatch by grabbing the +// keyset on every input tensor, or’ing them together, and dispatching to a +// specific piece of functionality. The functionality bits are *ordered*. When +// multiple functionality bits are set, we use the highest priority +// functionality. Similarly, multiple backend bits can theoretically be set if +// you call an operator with multiple tensors from difference devices (e.g. CPU +// and CUDA), although support for mixed device dispatch is limited (the only +// kernels that gracefully handle mixed device inputs for now are cuda kernels +// that take in a scalar cpu tensor). + +// A representation of a set of DispatchKeys. A tensor may have multiple +// tensor type ids, e.g., a Variable tensor can also be a CPU tensor; the +// DispatchKeySet specifies what type ids apply. The internal representation is +// as a 64-bit bit set (this means only 64 tensor type ids are supported). +// +// As mentioned above, DispatchKeys are ordered; thus, we can ask questions like +// "what is the highest priority DispatchKey in the set"? (The set itself is +// not ordered; two sets with the same ids will always have the ids ordered in +// the same way.) +// +// Note [DispatchKeySet Internal Representation] +// Internally, dispatch keys are packed into 64-bit DispatchKeySet objects +// that get passed around at runtime. +// However, there isn't necessarily a 1-to-1 mapping between bits in the keyset +// and individual dispatch keys. +// +// First: why do we have this distinction, and why not map every dispatch key +// directly to a bit? This is mostly because we have several types of +// functionalities that different backends would like to customize. For example, +// we have: +// - "Dense": CPU, CUDA, XLA, ... (~12 keys) +// - "Sparse": SparseCPU, SparseCUDA, ... +// - "SparseCsr": SparseCsrCPU, SparseCsrCUDA, ... +// - "Quantized": QuantizedCPU, QuantizedCUDA, QuantizedXLA, ... +// - "Autograd": AutogradCPU, AutogradCUDA, Autograd XLA, ... +// The problem is that total number of keys grows quadratically with [# +// backends] x [# functionalities], making it very difficult to map each key +// directly to a bit in a bitset without dramatically increasing the size of the +// bitset over time. +// +// The two enums (BackendComponent and DispatchKey) can be divided roughly into +// 5 categories. +// +// (1) "Building block" keys +// (a) backends: Everything in the BackendComponent enum (e.g. CPUBit, +// CUDABit) (b) functionalities: (per-backend) functionality-bit DispatchKeys +// (e.g. AutogradFunctionality, SparseCsr, Sparse, Dense) +// (2) "Runtime" keys +// (a) "non-customizable backends" (e.g. FPGA) +// (b) "non-customizable functionalities" (e.g. Functionalize) +// (c) "per-backend instances of customizable functionalities" (e.g. CPU, +// SparseCPU, AutogradCPU) +// (3) "Alias" DispatchKeys (see Note [Alias Dispatch Keys]) +// +// (1) Building block keys always correspond to individual bits in a +// DispatchKeySet. They can also be combined in a DispatchKeySet to form actual +// runtime keys. e.g. +// auto dense_cpu_ks = DispatchKeySet({DispatchKey::CPUBit, +// DispatchKey::Dense}); +// // The keyset has the runtime dense-cpu key. +// dense_cpu_ks.has(DispatchKey::CPU); +// // And it contains the building block keys too. +// dense_cpu_ks.has(DispatchKey::CPUBit); +// dense_cpu_ks.has(DispatchKey::Dense); +// +// Not every backend and not every functionality counts as a "building block +// key". This is mostly to give us more levers to pull in the design space. +// Backend keys and functionality keys that count as "building blocks" will +// contribute to a full cross product of functionality that can be overridden. +// +// For example, right now we have at least 12 "backend" building +// blocks (CPU, CUDA, XLA, ...) and at least 5 "functionality" +// building blocks (Dense, Sparse, SparseCsr, Quantized, +// AutogradFunctionality, ...). These keys together allow every +// dispatcher operator to be customized in up to 12*4 different +// ways. Each of those requires a slot in the operator table of every +// dispatcher operator. Not every piece of functionality necessarily +// needs to be customizable per-backend, and not every backend +// necessarily needs to be able to customize every type of +// functionality. +// +// +// (2) Every runtime key corresponds directly to a slot in an operator's runtime +// dispatch table, and you can directly register kernels to a runtime dispatch +// key. +// +// For per-backend functionalities like "Dense" or "AutogradFunctionality", +// you can think of the corresponding runtime dispatch keys as "instances" of +// that functionality, per backend. E.g. "CPU", "CUDA", "XLA", etc. are all +// runtime instances of the "Dense" building block key. + +// (2a) and (2b) are represented identically in the DispatchKeySet logic: +// - backend-agnostic functionalities (e.g. FuncTorchBatched) are NOT +// customizable per backend. +// In order to do so, we'd need to promote it to a per-backend functionality +// "building block" key. +// - non-customizable backends (e.g. FPGA) can NOT customize existing +// functionality like Sparse, Autograd, etc. +// In order to do so, we'd need to promote it to a backend "building block" +// key. +// +// In both cases, these keys directly correspond to runtime slots in the +// operator table. +// +// +// (3) "Alias" keys +// See Note [Alias Dispatch Keys] +// +// Final note: for anyone making future changes to the Dispatcher + +// DispatchKeySet internals, there's a closed PR with a basic +// python-implementation of the Dispatcher that might be useful in quickly +// testing out and validating changes. See it at +// https://github.com/pytorch/pytorch/pull/68743 + +// An undefined tensor is one with an empty tensor type set. +class DispatchKeySet final { + public: + enum Full { FULL }; + enum FullAfter { FULL_AFTER }; + enum Raw { RAW }; + + // NB: default constructor representation as zero is MANDATORY as + // use of DispatchKeySet in TLS requires this. + constexpr DispatchKeySet() = default; + + constexpr DispatchKeySet(Full) + : repr_((1ULL << (num_backends + num_functionality_keys - 1)) - 1) {} + + constexpr DispatchKeySet(FullAfter, DispatchKey t) + // LSB after t are OK, but not t itself. + // "functionalities" have a notion of ordering (e.g. Autograd > Sparse > + // Quantized > Dense). But backends don't really have an ordering. + // Therefore, we're enforcing that FullAfter can only be used on + // "functionality" keys. + : repr_( + (1ULL + << (num_backends + static_cast(toFunctionalityKey(t)) - + 1)) - + 1) { + *this = add(DispatchKey::PythonDispatcher); + } + + // Public version of DispatchKeySet(uint64_t) API; external users + // must be explicit when they do this! + constexpr DispatchKeySet(Raw, uint64_t x) : repr_(x) {} + + constexpr explicit DispatchKeySet(BackendComponent k) { + if (k == BackendComponent::InvalidBit) { + repr_ = 0; + } else { + repr_ = 1ULL << (static_cast(k) - 1); + } + } + + constexpr explicit DispatchKeySet(DispatchKey k) { + // NOLINTNEXTLINE(bugprone-branch-clone) + if (k == DispatchKey::Undefined) { + // Case 1: handle Undefined specifically + repr_ = 0; + } else if (k <= DispatchKey::EndOfFunctionalityKeys) { + // Case 2: handle "functionality-only" keys + // These keys have a functionality bit set, but no backend bits + // These can technically be either: + // - valid runtime keys (e.g. DispatchKey::AutogradOther, + // DispatchKey::FuncTorchBatched, etc) + // - "building block" keys that aren't actual runtime keys (e.g. + // DispatchKey::Dense or Sparse) + uint64_t functionality_val = 1ULL + << (num_backends + static_cast(k) - 1); + repr_ = functionality_val; + } else if (k <= DispatchKey::EndOfRuntimeBackendKeys) { + // Case 3: "runtime" keys that have a functionality bit AND a backend bit. + // First compute which bit to flip for the functionality. + auto functionality_k = toFunctionalityKey(k); + // The - 1 is because Undefined is technically a "functionality" that + // doesn't show up in the bitset. So e.g. Dense is technically the second + // functionality, but the lowest functionality bit. + uint64_t functionality_val = 1ULL + << (num_backends + static_cast(functionality_k) - 1); + + // then compute which bit to flip for the backend + // Case 4a: handle the runtime instances of "per-backend functionality" + // keys For example, given DispatchKey::CPU, we should set: + // - the Dense functionality bit + // - the CPUBit backend bit + // first compute which bit to flip for the backend + auto backend_k = toBackendComponent(k); + uint64_t backend_val = backend_k == BackendComponent::InvalidBit + ? 0 + : 1ULL << (static_cast(backend_k) - 1); + repr_ = functionality_val + backend_val; + } else { + // At this point, we should have covered every case except for alias keys. + // Technically it would be possible to add alias dispatch keys to a + // DispatchKeySet, but the semantics are a little confusing and this + // currently isn't needed anywhere. + repr_ = 0; + } + } + + constexpr uint64_t keys_to_repr(std::initializer_list ks) { + uint64_t repr = 0; + for (auto k : ks) { + repr |= DispatchKeySet(k).repr_; + } + return repr; + } + + constexpr uint64_t backend_bits_to_repr( + std::initializer_list ks) { + uint64_t repr = 0; + for (auto k : ks) { + repr |= DispatchKeySet(k).repr_; + } + return repr; + } + + explicit constexpr DispatchKeySet(std::initializer_list ks) + : repr_(keys_to_repr(ks)) {} + + explicit constexpr DispatchKeySet(std::initializer_list ks) + // Note: for some reason, putting this logic directly in the constructor + // appears to fail to compile on CUDA 10.1. + // See an example internal failure at + // https://www.internalfb.com/intern/skycastle/run/76561193669136035/artifact/actionlog.76561193742069401.stderr + : repr_(backend_bits_to_repr(ks)) {} + + // Test if a DispatchKey is in the set + inline bool has(DispatchKey t) const { + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(t != DispatchKey::Undefined); + return has_all(DispatchKeySet(t)); + } + constexpr bool has_backend(BackendComponent t) const { + return has_all(DispatchKeySet(t)); + } + + // Test if a DispatchKey is in the set + // Given a DispatchKeySet of functionality keys and (potentially) backend + // keys, tests if all of them are in the current set. + constexpr bool has_all(DispatchKeySet ks) const { + return static_cast((repr_ & ks.repr_) == ks.repr_); + } + + // Given a DispatchKeySet of functionality keys and (potentially) backend + // keys, tests if any of them are in the current set. This could technically + // be pretty easily implemented using has(). It is strictly a perf + // optimization though. There are many places in the code base where we want + // to test for multiple functionality keys together. HOWEVER, runtime + // per-backend functionality keys aren't allowed to be used with this + // function, because you can end up with weird results. e.g. + // DispatchKeySet(DispatchKey::AutogradCPU).has_any(DispatchKeySet(DispatchKey::CPU)) + // would return true. + inline bool has_any(DispatchKeySet ks) const { + TORCH_INTERNAL_ASSERT_DEBUG_ONLY( + // Either there are no backend bits in the input keyset + ((ks.repr_ & full_backend_mask) == 0) || + // or there are no per-backend-functionality bits + // See [Note: Per-Backend Functionality Dispatch Keys] + ((ks & + DispatchKeySet({ + DispatchKey::Dense, + DispatchKey::Quantized, + DispatchKey::Sparse, + DispatchKey::SparseCsr, + DispatchKey::AutogradFunctionality, + }) + .repr_) == 0)); + return static_cast((repr_ & ks.repr_) != 0); + } + // Test if DispatchKeySet is a superset of ks. + bool isSupersetOf(DispatchKeySet ks) const { + return (repr_ & ks.repr_) == ks.repr_; + } + // Perform set union + constexpr DispatchKeySet operator|(DispatchKeySet other) const { + return DispatchKeySet(repr_ | other.repr_); + } + // Perform set intersection + constexpr DispatchKeySet operator&(DispatchKeySet other) const { + return DispatchKeySet(repr_ & other.repr_); + } + // Compute the set difference self - other, + // but ONLY for the functionality keys. + // Any backend bits set on self will remain unchanged. + // See Note [Removing keys from DispatchKeySet Only Affects Functionality + // Keys] + constexpr DispatchKeySet operator-(DispatchKeySet other) const { + return DispatchKeySet(repr_ & (full_backend_mask | ~other.repr_)); + } + + // Compute self ^ other + constexpr DispatchKeySet operator^(DispatchKeySet other) const { + return DispatchKeySet(repr_ ^ other.repr_); + } + bool operator==(DispatchKeySet other) const { + return repr_ == other.repr_; + } + bool operator!=(DispatchKeySet other) const { + return repr_ != other.repr_; + } + // Add a DispatchKey to the DispatchKey set. Does NOT mutate, + // returns the extended DispatchKeySet! + [[nodiscard]] constexpr DispatchKeySet add(DispatchKey t) const { + return *this | DispatchKeySet(t); + } + [[nodiscard]] constexpr DispatchKeySet add(DispatchKeySet ks) const { + return *this | ks; + } + + // Remove a DispatchKey from the DispatchKey set. + // This is generally not an operation you should be doing + // (it's used to implement the printing overload, operator<<) + // + // Note [Removing keys from DispatchKeySet Only Affects Functionality Keys] + // Only functionality bits are allowed to be removed from a keyset. + // For now, we're only allowing removal of "functionality bits" from the + // keyset, which is specifically needed by the fallthrough key calculation + // logic. Why is removing backend bits problematic? Consider this example: + // + // DispatchKeySet([DispatchKey.CPU, DispatchKey.AutogradCUDA, + // DispatchKey.CUDA]).remove(DispatchKey.AutogradCUDA) + // DispatchKeySet([DispatchKey.CPU, + // DispatchKey.AutogradCUDA]).remove(DispatchKey.AutogradCUDA) + // + // What do we want to happen? + // Technically, we'd like it to be true that after removal, + // the first keyset still has the CUDA dispatch key while the second doesn't. + // Unfortunately there's no way to represent that, because the two keysets are + // represented the same way internally: functionality bits: Autograd, Dense + // backend bits: CPU, CUDA + // + // Instead, remove(DispatchKey.AutogradCPU) will only remove the "Autograd" + // bit from the bitset. + [[nodiscard]] constexpr DispatchKeySet remove(DispatchKey t) const { + return DispatchKeySet( + repr_ & ~(DispatchKeySet(t).repr_ & ~full_backend_mask)); + } + // You're allowed to remove a backend bit from a DispatchKeySet, + // but you have to be explicit about it (remove_backend() instead of + // remove()). + constexpr DispatchKeySet remove_backend(BackendComponent b) const { + return DispatchKeySet(repr_ & ~(DispatchKeySet(b).repr_)); + } + // Is the set empty? (AKA undefined tensor) + bool empty() const { + return repr_ == 0; + } + uint64_t raw_repr() const { + return repr_; + } + + static DispatchKeySet from_raw_repr(uint64_t x) { + return DispatchKeySet(RAW, x); + } + + DispatchKey highestFunctionalityKey() const { + auto functionality_idx = indexOfHighestBit(); + // This means that none of the functionality bits were set. + if (functionality_idx < num_backends) + return DispatchKey::Undefined; + // The first num_backend bits in the keyset don't correspond to real + // dispatch keys. + return static_cast(functionality_idx - num_backends); + } + + // This is similar like toBackendComponent(DispatchKey), but less restrictive. + // toBackendComponent() errors out if the key that it was passed has no + // backend bits, which is useful for error checking. We need a version of that + // here that can also handle "fake" backends like FPGA, because they need to + // map to the AutogradOther key. For those backends, we return + // BackendComponent::InvalidBit. + BackendComponent highestBackendKey() const { + // mask to mask out functionality bits + auto backend_idx = + DispatchKeySet(repr_ & full_backend_mask).indexOfHighestBit(); + // all zeros across the backend bits means that no backend bits are set. + if (backend_idx == 0) + return BackendComponent::InvalidBit; + return static_cast(backend_idx); + } + + // returns the DispatchKey of highest priority in the set. + DispatchKey highestPriorityTypeId() const { + auto functionality_k = highestFunctionalityKey(); + if (isPerBackendFunctionalityKey(functionality_k)) { + return toRuntimePerBackendFunctionalityKey( + functionality_k, highestBackendKey()); + } + return functionality_k; + } + + // Returns the index of the most-significant bit in the keyset. + // This is used to as part of the calculation into the operator table to get: + // - the highest "functionality" bit in the keyset. + // - the highest "backend" bit in the keyset. + uint8_t indexOfHighestBit() const { + return 64 - llvm::countLeadingZeros(repr_); + } + +#if defined(C10_MOBILE_TRIM_DISPATCH_KEYS) + // [Note: Trimmed Mobile Dispatch Keys] + /** + * The method below maps the dispatch key in the enum DispatchKey to an + * integer index in the dispatchTable_ array in OperatorEntry. The array + * is trimmed for mobile to reduce peak memory usage since it's + * unnecessary to reserve additional space for dispatch keys that will + * never be used on mobile. + */ + int getDispatchTableIndexForDispatchKeySet() const { + auto dk = highestPriorityTypeId(); + switch (dk) { + case DispatchKey::Undefined: + return 0; + case DispatchKey::CPU: + return 1; + case DispatchKey::QuantizedCPU: + return 2; + case DispatchKey::SparseCPU: + return 3; + case DispatchKey::BackendSelect: + return 4; + case DispatchKey::ADInplaceOrView: + return 5; + case DispatchKey::AutogradOther: + return 6; + case DispatchKey::AutogradCPU: + return 7; + default: + return -1; + } + } +#else + // returns the index in the operator table of highest priority key in the the + // keyset Note that we could in theory implement this using + // highestPriorityTypeId(), but this code is very hotpath and we can do it + // faster without it. + int getDispatchTableIndexForDispatchKeySet() const { + auto functionality_idx = + DispatchKeySet(repr_ >> num_backends).indexOfHighestBit(); + auto offset_and_mask = offsetsAndMasks()[functionality_idx]; + // Mask the functionality bits out first, then right-shift by 1. + // right-shifting by 1 because everything is zero-indexed. + // E.g. 000001 (CPU) should give us an offset of 0, 000010 (CUDA) should + // give us an offset of 1, etc. + auto backend_idx = + DispatchKeySet((repr_ & offset_and_mask.mask) >> 1).indexOfHighestBit(); + return offset_and_mask.offset + backend_idx; + } +#endif + + // returns the "index" of the highest priority backend in the keyset. + // This is pretty similar to getBackendKey(), but: + // - It's hotpath code (part of the runtime bitset calculation) + // - I's returns an integer index, not an enum value + // - Everything is shifted to the right by 1. + // BackendComponent::InvalidBit is technically the lowest enum value, + // but it isn't included in the runtime table. So CPUBit = 1, CUDABit = 2, + // etc. + uint64_t getBackendIndex() const { + return DispatchKeySet((repr_ & full_backend_mask) >> 1).indexOfHighestBit(); + } + + private: + constexpr DispatchKeySet(uint64_t repr) : repr_(repr) {} + uint64_t repr_ = 0; + + public: + // STL iterator for DispatchKeySet. Iterates through all runtime DispatchKeys + // in the set. The iterator is only invalidated by the destruction of the + // underlying DispatchKeySet as the iterator stores a pointer to the raw + // representation of the DispatchKeySet. Note: When we encounter a per-backend + // functionality (e.g. Dense or Sparse), we will iterate through EVERY backend + // in the keyset, for that functionality. For example, if the next + // functionality key to iterate over is Autograd, and the backend bits in the + // keyset correspond to [BackendComponent::CPUBit, BackendComponent::CUDABit], + // then the next two keys we return will be DispatchKey::AutogradCPU, + // DispatchKey::AutogradCUDA (CPU first because it has lower precedence than + // CUDA in DispatchKey.h). + class iterator { + public: + using self_type = iterator; + using iterator_category = std::input_iterator_tag; + using value_type = DispatchKey; + using difference_type = ptrdiff_t; + using reference = value_type&; + using pointer = value_type*; + // final mask value should mask out the entire keyset + static const uint8_t end_iter_mask_val = + num_backends + num_functionality_keys; + // final key value should be the last DispatchKey + static const uint8_t end_iter_key_val = num_functionality_keys; + + // current_dispatchkey_idx_ will iterate through all functionality bits. + // current_backendcomponent_idx_ will iterate through all backend bits. + explicit iterator( + const uint64_t* data_ptr, + uint8_t next_functionality = num_backends, + uint8_t next_backend = 0) + : data_ptr_(data_ptr), + next_functionality_(next_functionality), + next_backend_(next_backend), + // These are in an invalid state at construction time, and set by the + // first increment call + current_dispatchkey_idx_(end_iter_key_val), + current_backendcomponent_idx_(end_iter_key_val) { + // Go to the first key in the set + TORCH_INTERNAL_ASSERT( + next_functionality_ >= num_backends, + "num_backends=", + static_cast(num_backends), + "next_functionality_=", + static_cast(next_functionality_)); + ++(*this); + } + + C10_API self_type& operator++(); + + self_type operator++(int) { + self_type previous_iterator = *this; + ++(*this); + return previous_iterator; + } + + bool operator==(const self_type& rhs) const { + return next_functionality_ == rhs.next_functionality_ && + current_dispatchkey_idx_ == rhs.current_dispatchkey_idx_ && + next_backend_ == rhs.next_backend_ && + current_backendcomponent_idx_ == rhs.current_backendcomponent_idx_; + } + bool operator!=(const self_type& rhs) const { + return next_functionality_ != rhs.next_functionality_ || + current_dispatchkey_idx_ != rhs.current_dispatchkey_idx_ || + next_backend_ != rhs.next_backend_ || + current_backendcomponent_idx_ != rhs.current_backendcomponent_idx_; + } + DispatchKey operator*() const { + auto functionality_key = + static_cast(current_dispatchkey_idx_); + if (isPerBackendFunctionalityKey(functionality_key)) { + auto next_key = toRuntimePerBackendFunctionalityKey( + functionality_key, + static_cast(current_backendcomponent_idx_)); + // We expect all of the Dense, Sparse, Quantized, and Autograd keys to + // be ordered the same way with respect to their backends + TORCH_INTERNAL_ASSERT( + toBackendComponent(next_key) == + static_cast(current_backendcomponent_idx_), + "Tried to map functionality key ", + toString(functionality_key), + " and backend bit ", + toString( + static_cast(current_backendcomponent_idx_)), + " to a runtime key, but ended up with ", + toString(next_key), + ". This can happen if the order of the backend dispatch keys in DispatchKey.h isn't consistent.", + " Please double check that enum for inconsistencies."); + return next_key; + } else { + return functionality_key; + } + } + + private: + const uint64_t* data_ptr_; + uint8_t next_functionality_; + uint8_t next_backend_; + uint8_t current_dispatchkey_idx_; + uint8_t current_backendcomponent_idx_; + }; + + public: + // Returns iterator to the first key in the set. If no keys are in the + // set, then will return the end iterator. + iterator begin() const { + return iterator(&repr_); + } + + // We do not need to iterate beyond EndOfFunctionalityKeys so we will treat + // this as the end iterator. + iterator end() const { + return iterator(&repr_, iterator::end_iter_mask_val); + } +}; + +C10_API std::string toString(DispatchKeySet); +C10_API std::ostream& operator<<(std::ostream&, DispatchKeySet); + +C10_API inline int getDispatchTableIndexForDispatchKey(DispatchKey k) { + return DispatchKeySet(k).getDispatchTableIndexForDispatchKeySet(); +} + +// Alias key DispatchKey::Autograd maps to +// (autograd_dispatch_keyset x full_backend_mask) +// NB: keys in this set also get associated with CompositeImplicitAutograd +// +// Note [autograd_dispatch_keyset Does Not Include Backend Bits] +// We don't want to include any backend bits (BackendComponent::CPUBit, etc) +// directly in autograd_dispatch_keyset. +// Why? keysets like autograd_dispatch_keyset are commonly used to remove +// autograd keys from a DispatchKeySet throughout the code base. However, you +// are only allowed to remove functionality bits from a keyset, not backend +// bits. See Note [Removing keys from DispatchKeySet Only Affects Functionality +// Keys] for details. To be consistent and avoid confusion, we're explicitly +// setting up autograd_dispatch_keyset to not have any backend bits. +constexpr DispatchKeySet autograd_dispatch_keyset = DispatchKeySet({ + DispatchKey::AutogradFunctionality, + DispatchKey::AutogradOther, + DispatchKey::AutogradNestedTensor, +}); + +constexpr DispatchKeySet autocast_dispatch_keyset = DispatchKeySet({ + DispatchKey::AutocastCPU, + DispatchKey::AutocastMPS, + DispatchKey::AutocastCUDA, + DispatchKey::AutocastXPU, + DispatchKey::AutocastIPU, + DispatchKey::AutocastHPU, + DispatchKey::AutocastXLA, + DispatchKey::AutocastPrivateUse1, + DispatchKey::AutocastMTIA, + DispatchKey::AutocastMAIA, +}); + +// See Note [TLS Initialization] +constexpr DispatchKeySet default_included_set = DispatchKeySet({ + DispatchKey::BackendSelect, + DispatchKey::ADInplaceOrView, +}); + +constexpr DispatchKeySet default_excluded_set = DispatchKeySet({ + DispatchKey::AutocastCPU, + DispatchKey::AutocastMPS, + DispatchKey::AutocastCUDA, + DispatchKey::AutocastXPU, + DispatchKey::AutocastIPU, + DispatchKey::AutocastHPU, + DispatchKey::AutocastXLA, + DispatchKey::AutocastPrivateUse1, + DispatchKey::AutocastMTIA, + DispatchKey::AutocastMAIA, +}); + +constexpr DispatchKeySet autograd_dispatch_keyset_with_ADInplaceOrView = + autograd_dispatch_keyset | DispatchKeySet(DispatchKey::ADInplaceOrView); + +constexpr DispatchKeySet python_ks = DispatchKeySet({ + DispatchKey::Python, + DispatchKey::PythonTLSSnapshot, +}); + +constexpr DispatchKeySet sparse_ks = DispatchKeySet(DispatchKey::Sparse); + +constexpr DispatchKeySet sparse_csr_ks = DispatchKeySet(DispatchKey::SparseCsr); + +constexpr DispatchKeySet mkldnn_ks = DispatchKeySet(DispatchKey::MkldnnCPU); + +// backend dispatch keys that map to DispatchKey::AutogradOther +// NB: keys in this set also get associated with CompositeImplicitAutograd +constexpr DispatchKeySet autogradother_backends = + DispatchKeySet( + // HIP and VE aren't in this list: they now have their own backend bits + // which means that they can now have their own Autograd keys. + // Technically, HIP will now redispatch to its own custom AutogradHIP + // slot in the runtime table. + {DispatchKey::FPGA, + DispatchKey::Vulkan, + DispatchKey::Metal, + DispatchKey::CustomRNGKeyId, + DispatchKey::MkldnnCPU, + // Sparse and Quantized backends also live here. + DispatchKey::Sparse, + DispatchKey::SparseCsr, + DispatchKey::Quantized}) + // Including the backend bits because this keyset is used during op + // registration, which requires looping over all runtime autogradother + // backend keys. + | DispatchKeySet(DispatchKeySet::RAW, full_backend_mask); + +// The set of dispatch keys that come after autograd +// n.b. this relies on the fact that AutogradOther is currently the lowest +// Autograd key +constexpr DispatchKeySet after_autograd_keyset = + DispatchKeySet(DispatchKeySet::FULL_AFTER, c10::DispatchKey::AutogradOther); + +// The set of dispatch keys that come after ADInplaceOrView +constexpr DispatchKeySet after_ADInplaceOrView_keyset = DispatchKeySet( + DispatchKeySet::FULL_AFTER, + c10::DispatchKey::ADInplaceOrView); + +// The set of dispatch keys that come after Functionalize +constexpr DispatchKeySet after_func_keyset = + DispatchKeySet(DispatchKeySet::FULL_AFTER, c10::DispatchKey::Functionalize) + .remove( + // NOTE: we also need to remove ADInplaceOrView from the keyset when + // redispatching after the func kernels. This is because we're not + // calling the same op; we originally called an inplace op, and now + // we aren't. The original key calculation figured out which keys + // were Fallthrough based on the inplace op. That means that it did + // not include the ADInPlaceOrView kernel as a fallthrough key. + // However, we WANT the ADInPlaceOrView kernel to be ignored now + // that we're calling an out-of-place op. Re-invoking + // Dispatcher::call would re-run the Fallthrough key calculation and + // get us that, But at::redispatch is more performant. We can get + // away with it by explicitly removing the key here. + c10::DispatchKey::ADInplaceOrView); + +constexpr DispatchKeySet backend_bitset_mask = + DispatchKeySet(DispatchKeySet::RAW, (1ULL << num_backends) - 1); + +constexpr auto inplace_or_view_ks = + DispatchKeySet(DispatchKey::ADInplaceOrView); +constexpr auto autograd_cpu_ks = DispatchKeySet(DispatchKey::AutogradCPU); +constexpr auto autograd_ipu_ks = DispatchKeySet(DispatchKey::AutogradIPU); +constexpr auto autograd_mtia_ks = DispatchKeySet(DispatchKey::AutogradMTIA); +constexpr auto autograd_maia_ks = DispatchKeySet(DispatchKey::AutogradMAIA); +constexpr auto autograd_xpu_ks = DispatchKeySet(DispatchKey::AutogradXPU); +constexpr auto autograd_cuda_ks = DispatchKeySet(DispatchKey::AutogradCUDA); +constexpr auto autograd_xla_ks = DispatchKeySet(DispatchKey::AutogradXLA); +constexpr auto autograd_lazy_ks = DispatchKeySet(DispatchKey::AutogradLazy); +constexpr auto autograd_meta_ks = DispatchKeySet(DispatchKey::AutogradMeta); +constexpr auto autograd_mps_ks = DispatchKeySet(DispatchKey::AutogradMPS); +constexpr auto autograd_hpu_ks = DispatchKeySet(DispatchKey::AutogradHPU); +constexpr auto autograd_privateuse1_ks = + DispatchKeySet(DispatchKey::AutogradPrivateUse1); +constexpr auto autograd_privateuse2_ks = + DispatchKeySet(DispatchKey::AutogradPrivateUse2); +constexpr auto autograd_privateuse3_ks = + DispatchKeySet(DispatchKey::AutogradPrivateUse3); +constexpr auto autograd_other_ks = DispatchKeySet(DispatchKey::AutogradOther); +constexpr auto autograd_nested = + DispatchKeySet(DispatchKey::AutogradNestedTensor); +// keyset corresponding to functorch keys that have their own dedicated +// TensorImpl subclass. +constexpr auto functorch_transforms_ks = DispatchKeySet( + {DispatchKey::FuncTorchBatched, + DispatchKey::FuncTorchVmapMode, + DispatchKey::Batched, + DispatchKey::VmapMode, + DispatchKey::FuncTorchGradWrapper}); + +constexpr auto functorch_batched_ks = + DispatchKeySet({DispatchKey::FuncTorchBatched}); + +// This keyset has: +// (1) the functionality bits corresponding to backends (dense, sparse, +// quantized) (2) all of the backend bits set +constexpr DispatchKeySet backend_functionality_keys = + DispatchKeySet({ + DispatchKey::Dense, + DispatchKey::Quantized, + DispatchKey::Sparse, + DispatchKey::SparseCsr, + }) | + DispatchKeySet(DispatchKeySet::RAW, full_backend_mask); + +struct OpTableOffsetAndMask { + uint16_t offset; + uint16_t backend_mask; +}; + +static_assert( + num_backends <= 16, + "Right now we expect the number of backends not to exceed 16. In the (unlikely) event" + " that this changes, the size of OpTableOffsetAndMask::backend_mask needs to be increased too."); + +// true if t is a backend dispatch key +C10_API bool isBackendDispatchKey(DispatchKey t); + +// Resolve alias dispatch key to DispatchKeySet if applicable +C10_API DispatchKeySet getRuntimeDispatchKeySet(DispatchKey t); + +// Resolve alias dispatch key to DispatchKeySet if applicable, +// and check if k is a part of that set +C10_API bool runtimeDispatchKeySetHas(DispatchKey t, DispatchKey k); + +// Returns a DispatchKeySet of all backend keys mapped to Autograd dispatch key +// t, DispatchKeySet is empty if t is not alias of DispatchKey::Autograd. +C10_API DispatchKeySet getBackendKeySetFromAutograd(DispatchKey t); + +// Returns a DispatchKeySet of autograd related keys mapped to backend. +// for a given backend key, use the associated autograd key. +// for non-backend keys, use AutogradOther as a default. +// Note: it's convenient and fast to return a default here rather than (say) +// returning an std::optional, or throwing. But it makes callers +// responsible for either a) enforcing the invariant that only backend keys +// be passed as arguments, or b) interpreting our return value carefully. +inline DispatchKeySet getAutogradRelatedKeySetFromBackend(BackendComponent t) { + switch (t) { + case BackendComponent::CPUBit: + return inplace_or_view_ks | autograd_cpu_ks; + case BackendComponent::IPUBit: + return inplace_or_view_ks | autograd_ipu_ks; + case BackendComponent::MTIABit: + return inplace_or_view_ks | autograd_mtia_ks; + case BackendComponent::MAIABit: + return inplace_or_view_ks | autograd_maia_ks; + case BackendComponent::XPUBit: + return inplace_or_view_ks | autograd_xpu_ks; + case BackendComponent::CUDABit: + return inplace_or_view_ks | autograd_cuda_ks; + case BackendComponent::XLABit: + return inplace_or_view_ks | autograd_xla_ks; + case BackendComponent::LazyBit: + return inplace_or_view_ks | autograd_lazy_ks; + case BackendComponent::MetaBit: + return inplace_or_view_ks | autograd_meta_ks; + case BackendComponent::MPSBit: + return inplace_or_view_ks | autograd_mps_ks; + case BackendComponent::HPUBit: + return inplace_or_view_ks | autograd_hpu_ks; + case BackendComponent::PrivateUse1Bit: + return inplace_or_view_ks | autograd_privateuse1_ks; + case BackendComponent::PrivateUse2Bit: + return inplace_or_view_ks | autograd_privateuse2_ks; + case BackendComponent::PrivateUse3Bit: + return inplace_or_view_ks | autograd_privateuse3_ks; + default: + return inplace_or_view_ks | autograd_other_ks; + } +} + +// Returns a DispatchKeySet of autocast related keys mapped to backend. +inline DispatchKeySet getAutocastRelatedKeySetFromBackend(BackendComponent t) { + constexpr auto autocast_cpu_ks = DispatchKeySet(DispatchKey::AutocastCPU); + constexpr auto autocast_mtia_ks = DispatchKeySet(DispatchKey::AutocastMTIA); + constexpr auto autocast_maia_ks = DispatchKeySet(DispatchKey::AutocastMAIA); + constexpr auto autocast_xpu_ks = DispatchKeySet(DispatchKey::AutocastXPU); + constexpr auto autocast_ipu_ks = DispatchKeySet(DispatchKey::AutocastIPU); + constexpr auto autocast_hpu_ks = DispatchKeySet(DispatchKey::AutocastHPU); + constexpr auto autocast_cuda_ks = DispatchKeySet(DispatchKey::AutocastCUDA); + constexpr auto autocast_xla_ks = DispatchKeySet(DispatchKey::AutocastXLA); + constexpr auto autocast_privateuse1_ks = + DispatchKeySet(DispatchKey::AutocastPrivateUse1); + constexpr auto autocast_mps_ks = DispatchKeySet(DispatchKey::AutocastMPS); + switch (t) { + case BackendComponent::CPUBit: + return autocast_cpu_ks; + case BackendComponent::MTIABit: + return autocast_mtia_ks; + case BackendComponent::MAIABit: + return autocast_maia_ks; + case BackendComponent::XPUBit: + return autocast_xpu_ks; + case BackendComponent::IPUBit: + return autocast_ipu_ks; + case BackendComponent::HPUBit: + return autocast_hpu_ks; + case BackendComponent::CUDABit: + return autocast_cuda_ks; + case BackendComponent::XLABit: + return autocast_xla_ks; + case BackendComponent::PrivateUse1Bit: + return autocast_privateuse1_ks; + case BackendComponent::MPSBit: + return autocast_mps_ks; + default: + return DispatchKeySet(); + } +} + +// returns the "backend" DispatchKey of highest priority in the set. +// This is basically like highestBackendKey(), except that we have some +// "functionality" bits that correspond to backends (Sparse, Quantized) +inline DispatchKey highestPriorityBackendTypeId(DispatchKeySet ks) { + return (ks & backend_functionality_keys).highestPriorityTypeId(); +} + +// This API exists because we have a use case for checking +// getRuntimeDispatchKeySet(alias).has(DispatchKey::Undefined) +// in OperatorEntry.cpp but we disallow it in has() API. +C10_API bool isIncludedInAlias(DispatchKey k, DispatchKey alias); + +// Historically, every tensor only had a single DispatchKey, and it was always +// something like CPU, and there wasn't any of this business where TLS +// could cause the DispatchKey of a tensor to change. But we still have some +// legacy code that is still using DispatchKey for things like instanceof +// checks; if at all possible, refactor the code to stop using DispatchKey in +// those cases. +inline DispatchKey legacyExtractDispatchKey(DispatchKeySet s) { + // NB: If you add any extra keys that can be stored in TensorImpl on + // top of existing "backend" keys like CPU/CUDA, you need to add it + // here. At the moment, autograd keys and ADInplaceOrView key need this + // treatment; + return (s - autograd_dispatch_keyset_with_ADInplaceOrView - + autocast_dispatch_keyset - + DispatchKeySet( + {DispatchKey::Functionalize, + DispatchKey::PythonTLSSnapshot, + DispatchKey::FuncTorchGradWrapper, + DispatchKey::FuncTorchVmapMode, + DispatchKey::FuncTorchBatched, + DispatchKey::Python})) + .highestPriorityTypeId(); +} + +template +using is_not_DispatchKeySet = std::negation>; + +// Given a function type, constructs a function_traits type that drops the first +// parameter type if the first parameter is of type DispatchKeySet. NB: +// DispatchKeySet is currently explicitly hidden from JIT (mainly to avoid +// pushing unnecessary arguments on the stack - see Note [ Plumbing Keys Through +// the Dispatcher] for details). If at any point in the future we need to expose +// this type to JIT, revisit the usage of this type alias. +template +using remove_DispatchKeySet_arg_from_func = guts::make_function_traits_t< + typename guts::infer_function_traits_t::return_type, + typename std::conditional_t< + std::is_same_v< + DispatchKeySet, + typename guts::typelist::head_with_default_t< + void, + typename guts::infer_function_traits_t< + FuncType>::parameter_types>>, + guts::typelist::drop_if_nonempty_t< + typename guts::infer_function_traits_t::parameter_types, + 1>, + typename guts::infer_function_traits_t::parameter_types>>; +} // namespace c10 diff --git a/phivenv/Lib/site-packages/torch/include/c10/core/DynamicCast.h b/phivenv/Lib/site-packages/torch/include/c10/core/DynamicCast.h new file mode 100644 index 0000000000000000000000000000000000000000..65a5e4f3b66ff99829ac8089f07dd6eb5c97b699 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/core/DynamicCast.h @@ -0,0 +1,125 @@ +#pragma once + +#include +#include +#include +#include + +namespace c10 { + +// Dynamic type casting utils: +// - fetch_and_cast +// - cast_and_store +// +// fetch_and_cast fetch a value with dynamic type specified by a ScalarType +// from a void pointer and cast it to a static type. +// +// cast_and_store casts a static typed value into dynamic type specified +// by a ScalarType, and store it into a void pointer. +// +// NOTE: +// +// Dynamic casting allows us to support type promotion without blowing up +// the combination space: For example, without dynamic cast, in order to +// implement `add_` with type promotion, we would need something like +// +// AT_DISPATCH_ALL_TYPES(output.dtype(), +// AT_DISPATCH_ALL_TYPES(input1.dtype(), +// AT_DISPATCH_ALL_TYPES(input2.dtype(), +// [](arg0_t a, arg1_t b) -> out_t { return a + b; } +// ) +// ) +// ) +// +// If we support N dtypes, the above code would generate the a+b kernel for +// all the N * N * N different supported types, the compilation time and +// binary size would become horrible. +// +// Dynamic casting might sounds like a bad idea in terms of performance. +// Especially if you ever do it in a loop, you are going to do a billion tests. +// But in practice it is not as bad as it might look: +// +// - on CPU, this is a branch that always has the same outcome, therefore +// hopefully the branch predictor could do the job pretty well +// - on GPU, these branches will not diverge, so we could still have the same +// warp executing the same line of code +// - Most kernels, like `add`, are bandwidth bound, adding a few clock cycles to +// check an integer does not hurt the performance much because the ALUs would +// wait for load instructions anyway. +// +// For the discussion and benchmark, refer to: +// - https://github.com/pytorch/pytorch/pull/28343 +// - https://github.com/pytorch/pytorch/pull/28344 +// - https://github.com/pytorch/pytorch/pull/28345 +// + +#ifdef C10_HOST_DEVICE +#define ERROR_UNSUPPORTED_CAST CUDA_KERNEL_ASSERT(false); +#else +#define ERROR_UNSUPPORTED_CAST TORCH_CHECK(false, "Unexpected scalar type"); +#endif + +// Fetch a value with dynamic type src_type from ptr, and cast it to static type +// dest_t. +#define FETCH_AND_CAST_CASE(type, scalartype) \ + case ScalarType::scalartype: \ + return c10::convert(c10::load(ptr)); + +template +C10_HOST_DEVICE inline dest_t fetch_and_cast( + const ScalarType src_type, + const void* ptr) { + switch (src_type) { + AT_FORALL_SCALAR_TYPES_WITH_COMPLEX(FETCH_AND_CAST_CASE) + FETCH_AND_CAST_CASE(uint16_t, UInt16) + FETCH_AND_CAST_CASE(uint32_t, UInt32) + FETCH_AND_CAST_CASE(uint64_t, UInt64) + default: + ERROR_UNSUPPORTED_CAST + } + return dest_t(0); // just to avoid compiler warning +} + +// Cast a value with static type src_t into dynamic dest_type, and store it to +// ptr. +#define CAST_AND_STORE_CASE(type, scalartype) \ + case ScalarType::scalartype: \ + *(type*)ptr = c10::convert(value); \ + return; +template +C10_HOST_DEVICE inline void cast_and_store( + const ScalarType dest_type, + void* ptr, + src_t value) { + switch (dest_type) { + AT_FORALL_SCALAR_TYPES_WITH_COMPLEX(CAST_AND_STORE_CASE) + CAST_AND_STORE_CASE(uint16_t, UInt16) + CAST_AND_STORE_CASE(uint32_t, UInt32) + CAST_AND_STORE_CASE(uint64_t, UInt64) + default:; + } + ERROR_UNSUPPORTED_CAST +} + +#define DEFINE_UNCASTABLE(T, scalartype_) \ + template <> \ + C10_HOST_DEVICE inline T fetch_and_cast( \ + const ScalarType src_type, const void* ptr) { \ + CUDA_KERNEL_ASSERT(ScalarType::scalartype_ == src_type); \ + return c10::load(ptr); \ + } \ + template <> \ + C10_HOST_DEVICE inline void cast_and_store( \ + const ScalarType dest_type, void* ptr, T value) { \ + CUDA_KERNEL_ASSERT(ScalarType::scalartype_ == dest_type); \ + *(T*)ptr = value; \ + } + +AT_FORALL_QINT_TYPES(DEFINE_UNCASTABLE) + +#undef FETCH_AND_CAST_CASE +#undef CAST_AND_STORE_CASE +#undef DEFINE_UNCASTABLE +#undef ERROR_UNSUPPORTED_CAST + +} // namespace c10 diff --git a/phivenv/Lib/site-packages/torch/include/c10/core/Event.h b/phivenv/Lib/site-packages/torch/include/c10/core/Event.h new file mode 100644 index 0000000000000000000000000000000000000000..dd509f24ed66cd2f77cacef3ffd9e5fe96be2976 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/core/Event.h @@ -0,0 +1,137 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace c10 { + +/** + * A backend-generic movable, not copyable, not thread-safe event. + * + * The design of this event follows that of CUDA and HIP events. These events + * are recorded and waited on by streams and can be rerecorded to, + * each rerecording essentially creating a new version of the event. + * For example, if (in CPU time), stream X is asked to record E, + * stream Y waits on E, and stream X is asked to record E again, then Y will + * wait for X to finish the first call to record and not the second, because + * it's waiting on the first version of event E, not the second. + * Querying an event only returns the status of its most recent version. + * + * Backend-generic events are implemented by this class and + * impl::InlineEvent. In addition to these events there are also + * some backend-specific events, like ATen's CUDAEvent. Each of these + * classes has its own use. + * + * impl::InlineEvent<...> or a backend-specific event should be + * preferred when the backend is known at compile time and known to + * be compiled. Backend-specific events may have additional functionality. + * + * This Event should be used if a particular backend may not be available, + * or the backend required is not known at compile time. + * + * These generic events are built on top of DeviceGuardImpls, analogous + * to DeviceGuard and InlineDeviceGuard. The name "DeviceGuardImpls," + * is no longer entirely accurate, as these classes implement the + * backend-specific logic for a generic backend interface. + * + * See DeviceGuardImplInterface.h for a list of all supported flags. + */ + +struct Event final { + // Constructors + Event() = delete; + Event( + const DeviceType _device_type, + const EventFlag _flag = EventFlag::PYTORCH_DEFAULT) + : impl_{_device_type, _flag} {} + + // Copy constructor and copy assignment operator (deleted) + Event(const Event&) = delete; + Event& operator=(const Event&) = delete; + + // Move constructor and move assignment operator + Event(Event&&) noexcept = default; + Event& operator=(Event&&) noexcept = default; + + // Destructor + ~Event() = default; + + // Getters + Device device() const noexcept { + return Device(device_type(), device_index()); + } + DeviceType device_type() const noexcept { + return impl_.device_type(); + } + DeviceIndex device_index() const noexcept { + return impl_.device_index(); + } + EventFlag flag() const noexcept { + return impl_.flag(); + } + bool was_marked_for_recording() const noexcept { + return impl_.was_marked_for_recording(); + } + + /** + * Calls record() if and only if record() has never been called for this + * event. Note: because Event is not thread-safe recordOnce() may call + * record() multiple times if called from multiple threads. + */ + void recordOnce(const Stream& stream) { + impl_.recordOnce(stream); + } + + /** + * Increments the event's version and enqueues a job with this version + * in the stream's work queue. When the stream process that job + * it notifies all streams waiting on / blocked by that version of the + * event to continue and marks that version as recorded. + * */ + void record(const Stream& stream) { + impl_.record(stream); + } + + /** + * Does nothing if the event has not been scheduled to be recorded. + * If the event was previously enqueued to be recorded, a command + * to wait for the version of the event that exists at the time of this call + * is inserted in the stream's work queue. + * When the stream reaches this command it will stop processing + * additional commands until that version of the event is marked as recorded. + */ + void block(const Stream& stream) const { + impl_.block(stream); + } + + /** + * Returns true if (and only if) + * (1) the event has never been scheduled to be recorded + * (2) the current version is marked as recorded. + * Returns false otherwise. + */ + bool query() const { + return impl_.query(); + } + + double elapsedTime(const Event& event) const { + return impl_.elapsedTime(event.impl_); + } + + void* eventId() const { + return impl_.eventId(); + } + + void synchronize() const { + return impl_.synchronize(); + } + + private: + impl::InlineEvent impl_; +}; + +} // namespace c10 diff --git a/phivenv/Lib/site-packages/torch/include/c10/core/GeneratorImpl.h b/phivenv/Lib/site-packages/torch/include/c10/core/GeneratorImpl.h new file mode 100644 index 0000000000000000000000000000000000000000..5d377e9475d74c05dec14ff499cea1b24346fdf2 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/core/GeneratorImpl.h @@ -0,0 +1,111 @@ +#pragma once + +#include +#include + +#include +#include +#include +#include +#include +#include + +/** + * Note [Generator] + * ~~~~~~~~~~~~~~~~ + * A Pseudo Random Number Generator (PRNG) is an engine that uses an algorithm + * to generate a seemingly random sequence of numbers, that may be later be used + * in creating a random distribution. Such an engine almost always maintains a + * state and requires a seed to start off the creation of random numbers. Often + * times, users have found it beneficial to be able to explicitly create, + * retain, and destroy PRNG states and also be able to have control over the + * seed value. + * + * A Generator in ATen gives users the ability to read, write and modify a PRNG + * engine. For instance, it does so by letting users seed a PRNG engine, fork + * the state of the engine, etc. + * + * By default, there is one generator per device, and a device's generator is + * lazily created. A user can use the torch.Generator() api to create their own + * generator. Currently torch.Generator() can only create a CPUGeneratorImpl. + */ + +/** + * Note [Acquire lock when using random generators] + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * Generator and its derived classes are NOT thread-safe. Please note that most + * of the places where we have inserted locking for generators are historically + * based, and we haven't actually checked that everything is truly thread safe + * (and it probably isn't). Please use the public mutex_ when using any methods + * from these classes, except for the read-only methods. You can learn about the + * usage by looking into the unittests (aten/src/ATen/cpu_generator_test.cpp) + * and other places where we have used lock_guard. + * + * TODO: Look into changing the threading semantics of Generators in ATen (e.g., + * making them non-thread safe and instead making the generator state + * splittable, to accommodate forks into other threads). + */ + +namespace c10 { + +// The default seed is selected to be a large number +// with good distribution of 0s and 1s in bit representation +constexpr uint64_t default_rng_seed_val = 67280421310721; + +struct C10_API GeneratorImpl : public c10::intrusive_ptr_target { + // Constructors + GeneratorImpl(Device device_in, DispatchKeySet key_set); + + // Delete all copy and move assignment in favor of clone() + // method + GeneratorImpl(const GeneratorImpl& other) = delete; + GeneratorImpl(GeneratorImpl&& other) = delete; + GeneratorImpl& operator=(const GeneratorImpl& other) = delete; + GeneratorImpl& operator=(GeneratorImpl&& other) = delete; + + ~GeneratorImpl() override = default; + c10::intrusive_ptr clone() const; + + // Common methods for all generators + virtual void set_current_seed(uint64_t seed) = 0; + virtual void set_offset(uint64_t offset) = 0; + virtual uint64_t get_offset() const = 0; + virtual uint64_t current_seed() const = 0; + virtual uint64_t seed() = 0; + virtual void set_state(const c10::TensorImpl& new_state) = 0; + virtual c10::intrusive_ptr get_state() const = 0; + virtual void graphsafe_set_state( + const c10::intrusive_ptr& new_state); + virtual c10::intrusive_ptr graphsafe_get_state() const; + Device device() const; + + // See Note [Acquire lock when using random generators] + std::mutex mutex_; + + DispatchKeySet key_set() const { + return key_set_; + } + + inline void set_pyobj(PyObject* pyobj) noexcept { + pyobj_ = pyobj; + } + + inline PyObject* pyobj() const noexcept { + return pyobj_; + } + + protected: + Device device_; + DispatchKeySet key_set_; + PyObject* pyobj_ = nullptr; + + virtual GeneratorImpl* clone_impl() const = 0; +}; + +namespace detail { + +C10_API uint64_t getNonDeterministicRandom(bool is_cuda = false); + +} // namespace detail + +} // namespace c10 diff --git a/phivenv/Lib/site-packages/torch/include/c10/core/GradMode.h b/phivenv/Lib/site-packages/torch/include/c10/core/GradMode.h new file mode 100644 index 0000000000000000000000000000000000000000..4fbfce67bfe1537f963a76c3ccdd6d35c201ab8c --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/core/GradMode.h @@ -0,0 +1,52 @@ +#pragma once + +#include +#include + +namespace c10 { + +struct C10_API GradMode { + static bool is_enabled(); + static void set_enabled(bool enabled); +}; + +// A RAII, thread local (!) guard that enables or disables grad mode upon +// construction, and sets it back to the original value upon destruction. +struct C10_API AutoGradMode { + AutoGradMode(bool enabled) : prev_mode(GradMode::is_enabled()) { + GradMode::set_enabled(enabled); + } + AutoGradMode(const AutoGradMode&) = delete; + AutoGradMode(AutoGradMode&&) = delete; + AutoGradMode& operator=(const AutoGradMode&) = delete; + AutoGradMode& operator=(AutoGradMode&&) = delete; + ~AutoGradMode() { + GradMode::set_enabled(prev_mode); + } + bool prev_mode; +}; + +// A RAII, thread local (!) guard that stops future operations from building +// gradients. +struct C10_API NoGradGuard : public AutoGradMode { + NoGradGuard() : AutoGradMode(/*enabled=*/false) {} +}; + +// A RAII, thread local (!) guard that enables or disables forward grad mode +// upon construction, and sets it back to the original value upon destruction. +struct C10_API AutoFwGradMode { + AutoFwGradMode(bool enabled) + : prev_mode(AutogradState::get_tls_state().get_fw_grad_mode()) { + AutogradState::get_tls_state().set_fw_grad_mode(enabled); + } + AutoFwGradMode(const AutoFwGradMode&) = delete; + AutoFwGradMode(AutoFwGradMode&&) = delete; + AutoFwGradMode& operator=(const AutoFwGradMode&) = delete; + AutoFwGradMode& operator=(AutoFwGradMode&&) = delete; + ~AutoFwGradMode() { + AutogradState::get_tls_state().set_fw_grad_mode(prev_mode); + } + bool prev_mode; +}; + +} // namespace c10 diff --git a/phivenv/Lib/site-packages/torch/include/c10/core/InferenceMode.h b/phivenv/Lib/site-packages/torch/include/c10/core/InferenceMode.h new file mode 100644 index 0000000000000000000000000000000000000000..21282fd15f7409824ec607094887141de2c6a1aa --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/core/InferenceMode.h @@ -0,0 +1,91 @@ +#pragma once + +#include +#include +#include +#include +#include + +namespace c10 { + +// A RAII, thread local (!) guard that enables or disables inference mode upon +// construction, and sets it back to the original value upon destruction. +struct C10_API InferenceMode { + // Note [Expected TLS state in InferenceMode]: + // InferenceMode: ADInplaceOrView not in + // raw_local_dispatch_key_set.included(), + // Autograd in raw_local_dispatch_key_set.excluded() + // GradMode is disabled. + // NormalMode: ADInplaceOrView in raw_local_dispatch_key_set.included(), + // Autograd not in raw_local_dispatch_key_set.excluded() + // GradMode is enabled by default unless toggled manually + // through other APIs, e.g. NoGradGuard. + // + // Invariant: + // - ADInplaceOrView is never in the excluded set + // - Autograd is never in the included set + // - Setting InferenceMode will set GradMode accordingly, but not vice versa. + // + // 1. Why do we put ADInplaceOrView in included set outside InferenceMode? + // + // Inplace update to inference tensor outside InferenceMode is not + // allowed. See Note [Inplace update inference tensor] for more details. + // Without going through ADInplaceOrView kernel, we cannot throw error + // for `inference_tensor.add_(1)` case. + // + // 2. Why not put ADInplaceOrView in the excluded set inside InferenceMode? + // + // For example: + // torch::Tensor a = torch::ones({1, 2, 3}).set_requires_grad(true); + // torch::Tensor k = a + 2; + // { + // c10::InferenceMode guard(true); + // k.add_(2); + // } + // `k.add_(2)` still need to go through ADInplaceOrView kernel so that it's + // prepared for future autograd. + // + // 3. Why does setting InferenceMode also set GradMode? + // + // This is required since InferenceMode is a faster and more restrictive + // version of NoGradGuard. All runtime checks using GradMode::is_enabled() + // are applicable to InferenceMode as well, e.g. + // `tensorTypeInCurrentExecutionContext` in interpreter.cpp. + InferenceMode(bool enabled = true) + : prev_mode(AutogradState::get_tls_state()), + prev_keyset(c10::impl::tls_local_dispatch_key_set()) { + // Enabling inference mode means disabling grad modes + // And disabling inference mode means enabling grad modes + AutogradState::set_tls_state(AutogradState( + /* grad_mode */ !enabled, + /* inference_mode */ enabled, + /* fw_grad_mode */ !enabled, + /* multithreading_enabled*/ !enabled)); + DispatchKeySet included = enabled + ? prev_keyset.included_.remove(c10::DispatchKey::ADInplaceOrView) + : prev_keyset.included_.add(c10::DispatchKey::ADInplaceOrView); + DispatchKeySet excluded = enabled + ? (prev_keyset.excluded_ | c10::autograd_dispatch_keyset) + : (prev_keyset.excluded_ - c10::autograd_dispatch_keyset); + c10::impl::PODLocalDispatchKeySet cur_keyset{}; + cur_keyset.set_included(included); + cur_keyset.set_excluded(excluded); + c10::impl::_force_tls_local_dispatch_key_set(cur_keyset); + } + + InferenceMode(const InferenceMode&) = delete; + InferenceMode(InferenceMode&&) = delete; + InferenceMode& operator=(const InferenceMode&) = delete; + InferenceMode& operator=(InferenceMode&&) = delete; + + ~InferenceMode() { + AutogradState::set_tls_state(prev_mode); + c10::impl::_force_tls_local_dispatch_key_set(prev_keyset); + } + static bool is_enabled(); + + private: + AutogradState prev_mode; + c10::impl::LocalDispatchKeySet prev_keyset; +}; +} // namespace c10 diff --git a/phivenv/Lib/site-packages/torch/include/c10/core/Layout.h b/phivenv/Lib/site-packages/torch/include/c10/core/Layout.h new file mode 100644 index 0000000000000000000000000000000000000000..3ec87697d18b902a5e3ec112911a5f4c615de678 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/core/Layout.h @@ -0,0 +1,78 @@ +#pragma once + +#include +#include + +#include +#include + +namespace c10 { +enum class Layout : int8_t { + Strided, + Sparse, + SparseCsr, + Mkldnn, + SparseCsc, + SparseBsr, + SparseBsc, + Jagged, + NumOptions +}; + +constexpr auto kStrided = Layout::Strided; +constexpr auto kSparse = Layout::Sparse; +constexpr auto kSparseCsr = Layout::SparseCsr; +constexpr auto kMkldnn = Layout::Mkldnn; +constexpr auto kSparseCsc = Layout::SparseCsc; +constexpr auto kSparseBsr = Layout::SparseBsr; +constexpr auto kSparseBsc = Layout::SparseBsc; +constexpr auto kJagged = Layout::Jagged; + +inline Layout layout_from_backend(Backend backend) { + switch (backend) { + case Backend::SparseCPU: + case Backend::SparseCUDA: + case Backend::SparseHIP: + case Backend::SparseVE: + case Backend::SparseXPU: + case Backend::SparsePrivateUse1: + return Layout::Sparse; + case Backend::MkldnnCPU: + return Layout::Mkldnn; + case Backend::SparseCsrCPU: + case Backend::SparseCsrCUDA: + case Backend::SparseCsrHIP: + case Backend::SparseCsrVE: + case Backend::SparseCsrXPU: + TORCH_CHECK( + false, + "Cannot map Backend SparseCsr(CPU|CUDA|HIP|VE|XPU) to a unique layout."); + default: + return Layout::Strided; + } +} + +inline std::ostream& operator<<(std::ostream& stream, at::Layout layout) { + switch (layout) { + case at::kStrided: + return stream << "Strided"; + case at::kSparse: + return stream << "Sparse"; + case at::kSparseCsr: + return stream << "SparseCsr"; + case at::kSparseCsc: + return stream << "SparseCsc"; + case at::kSparseBsr: + return stream << "SparseBsr"; + case at::kSparseBsc: + return stream << "SparseBsc"; + case at::kMkldnn: + return stream << "Mkldnn"; + case at::kJagged: + return stream << "Jagged"; + default: + TORCH_CHECK(false, "Unknown layout"); + } +} + +} // namespace c10 diff --git a/phivenv/Lib/site-packages/torch/include/c10/core/MemoryFormat.h b/phivenv/Lib/site-packages/torch/include/c10/core/MemoryFormat.h new file mode 100644 index 0000000000000000000000000000000000000000..55d25bd24d35b5cc50afce435042bc85d4f7fb76 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/core/MemoryFormat.h @@ -0,0 +1,290 @@ +#pragma once + +#include +#include + +#include +#include +#include + +// Memory format is not the property of a Tensor. It is the way to tell an +// operator how the result should be organized in memory and nothing more. That +// means memory format should never be used as return value for any tensor state +// interrogation functions (internally and externally). +// +// Possible options are: +// Preserve: +// If any of the input tensors is in channels_last format, operator output +// should be in channels_last format +// +// Contiguous: +// Regardless of input tensors format, the output should be contiguous +// Tensor. +// +// ChannelsLast: +// Regardless of input tensors format, the output should be in channels_last +// format. + +namespace c10 { +enum class MemoryFormat : int8_t { + Contiguous, + Preserve, + ChannelsLast, + ChannelsLast3d, + NumOptions +}; + +// If you are seeing this, it means that this call site was not checked if +// the memory format could be preserved, and it was switched to old default +// behaviour of contiguous +#define LEGACY_CONTIGUOUS_MEMORY_FORMAT c10::get_contiguous_memory_format() + +inline MemoryFormat get_contiguous_memory_format() { + return MemoryFormat::Contiguous; +} + +inline std::ostream& operator<<( + std::ostream& stream, + at::MemoryFormat memory_format) { + switch (memory_format) { + case MemoryFormat::Preserve: + return stream << "Preserve"; + case MemoryFormat::Contiguous: + return stream << "Contiguous"; + case MemoryFormat::ChannelsLast: + return stream << "ChannelsLast"; + case MemoryFormat::ChannelsLast3d: + return stream << "ChannelsLast3d"; + default: + TORCH_CHECK(false, "Unknown memory format ", memory_format); + } +} + +// Note: Hardcoded the channel last stride indices here to get better +// performance +template +inline std::vector get_channels_last_strides_2d(ArrayRef sizes) { + std::vector strides(sizes.size()); + switch (sizes.size()) { + case 4: + strides[1] = 1; + strides[3] = sizes[1]; + strides[2] = strides[3] * sizes[3]; + strides[0] = strides[2] * sizes[2]; + return strides; + case 3: + strides[0] = 1; + strides[2] = sizes[0]; + strides[1] = strides[2] * sizes[2]; + return strides; + default: + TORCH_INTERNAL_ASSERT( + false, "ChannelsLast2d doesn't support size ", sizes.size()); + } +} + +inline std::vector get_channels_last_strides_2d(IntArrayRef sizes) { + return get_channels_last_strides_2d(sizes); +} + +template +std::vector get_channels_last_strides_3d(ArrayRef sizes) { + std::vector strides(sizes.size()); + switch (sizes.size()) { + case 5: + strides[1] = 1; + strides[4] = sizes[1]; + strides[3] = strides[4] * sizes[4]; + strides[2] = strides[3] * sizes[3]; + strides[0] = strides[2] * sizes[2]; + return strides; + case 4: + strides[0] = 1; + strides[3] = sizes[0]; + strides[2] = strides[3] * sizes[3]; + strides[1] = strides[2] * sizes[2]; + return strides; + default: + TORCH_INTERNAL_ASSERT( + false, "ChannelsLast3d doesn't support size ", sizes.size()); + } +} + +inline std::vector get_channels_last_strides_3d(IntArrayRef sizes) { + return get_channels_last_strides_3d(sizes); +} + +// NOTE: +// Below are Helper functions for is_channels_last_strides_xd. +// 1. Please do not combine these helper functions, each helper function handles +// exactly one case of sizes + memory_format, by doing this, the strides indices +// will be a constant array and we can access it using constant index number, +// the compiler will fully unroll the loop on strides indices to gain a better +// performance. +// 2. No error check in helper function, caller ensures the correctness of the +// input +// 3. All helper functions have similar comments, only 1st helper function is +// commented here. +template +inline bool is_channels_last_strides_2d_s4( + const ArrayRef sizes, + const ArrayRef strides) { + T min = 0; + // special case for trivial C dimension. default to NCHW + if (strides[1] == 0) { + return false; + } + // loop strides indices + for (auto& d : {1, 3, 2, 0}) { + if (sizes[d] == 0) { + return false; + } + if (strides[d] < min) { + return false; + } + // Fallback to NCHW as default layout for ambiguous cases + // This is the flaw of implicit memory_format from strides. + // N111 tensor with identical strides for size 1 dimension; + // Two cases could lead us here: + // a. N111 contiguous Tensor ([N,1,1,1]@[1,1,1,1]) + // b. N11W contiguous Tensor sliced on the W-dimension. + // ([N,1,1,1]@[W,W,W,W]) + if (d == 0 && min == strides[1]) { + return false; + } + // This is necessary to: + // 1. distinguish the memory_format of N1H1; + // [H, 1, 1, 1] channels_last stride + // [H, H, 1, 1] contiguous stride + // 2. permutation of 1C1W: + // [1, C, 1, H]@[HC, H, H, 1] transpose(1, 3) + // [1, H, 1, C]@[HC, 1, H, H] shouldn't be identified as channels_last + min = strides[d]; + if (sizes[d] > 1) { + min *= sizes[d]; + } + } + return true; +} + +template +inline bool is_channels_last_strides_3d_s5( + const ArrayRef sizes, + const ArrayRef strides) { + T min = 0; + if (strides[1] == 0) { + return false; + } + for (auto& d : {1, 4, 3, 2, 0}) { + if (sizes[d] == 0) { + return false; + } + if (strides[d] < min) { + return false; + } + if (d == 0 && min == strides[1]) { + return false; + } + min = strides[d]; + if (sizes[d] > 1) { + min *= sizes[d]; + } + } + return true; +} + +// Note [Ambiguous is_channels_last_strides_xd] +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +// The flaw of carrying memory_format implicitly through strides is very hard +// to WAR properly. issue #24090 +// Without the history of permutation, we can't infer the memory_format of a +// tensor from the snapshot of its size & stride +// e.g. +// +// 1. We can NOT specify the memory_format of N111 tensor through strides in a +// meaningful way; +// +// 2. Two path that ended up with identical size/stride +// N11W contiguous tensor sliced at w-dimension becomes [N,1,1,1]@[W,W,W,W] +// NC11 channels_last tensor sliced at c-dimension becomes [N,1,1,1]@[C,C,C,C] +// So if we see a tensor [N,1,1,1]@[X,X,X,X], there's no way for us to infer +// the memory_format of the original tensor. +// +// Due to the limitations, our temporary WAR `is_channels_last_strides` does the +// best effort to infer whether the original memory_format of a tensor is +// at::MemoryFormat::ChannelsLast. The two objectives of this function (ordered +// by their importance): +// 1. Ensure that normal shape manipulation does not accidentally change the +// MemoryFormat of an existing tensor. +// 2. Allows user to mark MemoryFormat::ChannelsLast to tensors; +// +// The function does so via checking strides of the tensor, including strides of +// size-1 dimensions. Although conventionally PyTorch implies no restriction on +// trivial stride (stride for size-1 dimension). +// +// Note that this approach is a compromise. We did not solve the problem +// completely. Many cases we will not be able to infer the correct memory +// format. +// The implementation of `is_channels_last_strides` is to serve the objectives: +// MemoryFormat::ChannelsLast has to be explicitly opted-in (no accidental +// conversion); Best effort to maintain the ChannelsLast flag. +// +// Due to the fact that this is not a bulletproof solution, through testing +// (aten/src/ATen/test/memory_format_test.cpp) +// a. we ensure that the common tasks are supported; +// a. we identify corner cases where the implementation compromises on. +// +// By the time accumulated permutation is enabled to replace implicit +// memory_format through strides, we should be updating our tests and fix the +// issues in our tests. +// +// We use Channels Last 2d as an example above. +// This is a general problem for all the is_channels_last_strides_xd +// implementation. Please check the helper functions +// (is_channels_last_strides_*d_s*) for more details. + +template +inline bool is_channels_last_strides_2d( + const ArrayRef sizes, + const ArrayRef strides) { + switch (sizes.size()) { + case 4: + return is_channels_last_strides_2d_s4(sizes, strides); + // NOLINTNEXTLINE(bugprone-branch-clone) + case 3: + // TODO dim == 3 case will be enabled once it is fully tested + return false; + default: + return false; + } +} + +template +inline bool is_channels_last_strides_3d( + const ArrayRef sizes, + const ArrayRef strides) { + switch (sizes.size()) { + case 5: + return is_channels_last_strides_3d_s5(sizes, strides); + // NOLINTNEXTLINE(bugprone-branch-clone) + case 4: + // TODO dim == 4 case will be enabled once it is fully tested + return false; + default: + return false; + } +} + +inline bool is_channels_last_strides_2d( + const IntArrayRef sizes, + const IntArrayRef strides) { + return is_channels_last_strides_2d(sizes, strides); +} + +inline bool is_channels_last_strides_3d( + const IntArrayRef sizes, + const IntArrayRef strides) { + return is_channels_last_strides_3d(sizes, strides); +} + +} // namespace c10 diff --git a/phivenv/Lib/site-packages/torch/include/c10/core/OptionalRef.h b/phivenv/Lib/site-packages/torch/include/c10/core/OptionalRef.h new file mode 100644 index 0000000000000000000000000000000000000000..84c843ec68164c1dcf01990a0319b25f8cd674ec --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/core/OptionalRef.h @@ -0,0 +1,31 @@ +#pragma once + +namespace c10 { + +template +class OptionalRef { + public: + OptionalRef() : data_(nullptr) {} + OptionalRef(const T* data) : data_(data) { + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(data_); + } + OptionalRef(const T& data) : data_(&data) {} + + bool has_value() const { + return data_ != nullptr; + } + + const T& get() const { + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(data_); + return *data_; + } + + operator bool() const { + return has_value(); + } + + private: + const T* data_; +}; + +} // namespace c10 diff --git a/phivenv/Lib/site-packages/torch/include/c10/core/PyHandleCache.h b/phivenv/Lib/site-packages/torch/include/c10/core/PyHandleCache.h new file mode 100644 index 0000000000000000000000000000000000000000..37245dbed26c4afc2700089a6cf06a34d2a12d8c --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/core/PyHandleCache.h @@ -0,0 +1,76 @@ +#pragma once + +#include +#include +#include +#include + +#include + +namespace c10 { + +// A PyHandleCache represents a cached pointer from a C++ object to +// a Python object that represents that object analogously in Python. +// Upon a cache hit, the relevant object can be retrieved after a test +// and then a memory load. Two conditions must hold to be able to use this +// class: +// +// - This must truly be a cache; e.g., the caller must be able to produce +// the object some other way if the cache hit misses. +// +// - This must truly be a handle; e.g., the Python object referenced by +// this class must have static lifetime. This means we don't have to +// maintain strong ownership or deallocate the object when the C++ object +// dies. Static lifetime is a good idea in conjunction with the cache, +// since if you are producing a fresh object on miss you won't be +// maintaining object identity. If you need bidirectional ownership, +// you will want to factor out the pattern in TensorImpl with +// resurrection. +// +// This cache is expected to not improve perf under torchdeploy, as one +// interpreter will fill up the cache, and all the interpreters will be +// unable to use the slot. A potential improvement is to have multiple +// slots (one per interpreter), which will work in deployment scenarios +// where there a stable, fixed number of interpreters. You can also store +// the relevant state in the Python library, rather than in the non-Python +// library (although in many cases, this is not convenient, as there may +// not be a way to conveniently index based on the object.) +class PyHandleCache { + public: + PyHandleCache() : pyinterpreter_(nullptr) {} + + // Attempt to fetch the pointer from the cache, if the PyInterpreter + // matches. If it doesn't exist, or the cache entry is not valid, + // use slow_accessor to get the real pointer value and return that + // (possibly writing it to the cache, if the cache entry is + // available.) + template + PyObject* ptr_or(impl::PyInterpreter* self_interpreter, F slow_accessor) + const { + // Note [Memory ordering on Python interpreter tag] + impl::PyInterpreter* interpreter = + pyinterpreter_.load(std::memory_order_acquire); + if (C10_LIKELY(interpreter == self_interpreter)) { + return data_; + } else if (interpreter == nullptr) { + auto* r = slow_accessor(); + impl::PyInterpreter* expected = nullptr; + // attempt to claim this cache entry with the specified interpreter tag + if (pyinterpreter_.compare_exchange_strong( + expected, self_interpreter, std::memory_order_acq_rel)) { + data_ = r; + } + // This shouldn't be possible, as you should be GIL protected + TORCH_INTERNAL_ASSERT(expected != self_interpreter); + return r; + } else { + return slow_accessor(); + } + } + + private: + mutable std::atomic pyinterpreter_; + mutable PyObject* data_{nullptr}; +}; + +} // namespace c10 diff --git a/phivenv/Lib/site-packages/torch/include/c10/core/QEngine.h b/phivenv/Lib/site-packages/torch/include/c10/core/QEngine.h new file mode 100644 index 0000000000000000000000000000000000000000..b8a0ac9639303e3ce466db30c53e02739ef68224 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/core/QEngine.h @@ -0,0 +1,46 @@ +#pragma once + +#include +#include +#include + +namespace c10 { + +/** + * QEngine is an enum that is used to select the engine to run quantized ops. + * Keep this enum in sync with get_qengine_id() in + * torch/backends/quantized/__init__.py + */ +enum class QEngine : uint8_t { + NoQEngine = 0, + FBGEMM = 1, + QNNPACK = 2, + ONEDNN = 3, + X86 = 4, +}; + +constexpr auto kNoQEngine = QEngine::NoQEngine; +constexpr auto kFBGEMM = QEngine::FBGEMM; +constexpr auto kQNNPACK = QEngine::QNNPACK; +constexpr auto kONEDNN = QEngine::ONEDNN; +constexpr auto kX86 = QEngine::X86; + +inline std::string toString(QEngine qengine) { + switch (qengine) { + case kNoQEngine: + return "NoQEngine"; + case kFBGEMM: + return "FBGEMM"; + case kQNNPACK: + return "QNNPACK"; + case kONEDNN: + return "ONEDNN"; + case kX86: + return "X86"; + default: + TORCH_CHECK( + false, "Unrecognized Quantized Engine: ", static_cast(qengine)); + } +} + +} // namespace c10 diff --git a/phivenv/Lib/site-packages/torch/include/c10/core/QScheme.h b/phivenv/Lib/site-packages/torch/include/c10/core/QScheme.h new file mode 100644 index 0000000000000000000000000000000000000000..158839257eabe869fdb2c04e7754a938fe2eacd8 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/core/QScheme.h @@ -0,0 +1,50 @@ +#pragma once + +#include +#include +#include + +namespace c10 { + +/** + * QScheme is an enum that specifies the type of quantization. This has a one + * to one correspondence with Quantizer + * Please refer to ATen/quantized/Quantizer.h to see the Quantizers classes. + * Keep this file in sync with torch/nn/_qscheme.py + */ +enum class QScheme : uint8_t { + PER_TENSOR_AFFINE = 0, + PER_CHANNEL_AFFINE = 1, + PER_TENSOR_SYMMETRIC = 2, + PER_CHANNEL_SYMMETRIC = 3, + PER_CHANNEL_AFFINE_FLOAT_QPARAMS = 4, + COMPILE_TIME_NUM_QSCHEMES = 5, +}; + +constexpr auto kPerTensorAffine = QScheme::PER_TENSOR_AFFINE; +constexpr auto kPerChannelAffine = QScheme::PER_CHANNEL_AFFINE; +constexpr auto kPerTensorSymmetric = QScheme::PER_TENSOR_SYMMETRIC; +constexpr auto kPerChannelSymmetric = QScheme::PER_CHANNEL_SYMMETRIC; +constexpr auto kPerChannelAffineFloatQParams = + QScheme::PER_CHANNEL_AFFINE_FLOAT_QPARAMS; +constexpr int COMPILE_TIME_NUM_QSCHEMES = + static_cast(QScheme::COMPILE_TIME_NUM_QSCHEMES); + +inline std::string toString(QScheme qscheme) { + switch (qscheme) { + case kPerTensorAffine: + return "per_tensor_affine"; + case kPerChannelAffine: + return "per_channel_affine"; + case kPerTensorSymmetric: + return "per_tensor_symmetric"; + case kPerChannelSymmetric: + return "per_channel_symmetric"; + case kPerChannelAffineFloatQParams: + return "per_channel_affine_float_qparams"; + default: + TORCH_CHECK(false, "Unrecognized qscheme: ", static_cast(qscheme)); + } +} + +} // namespace c10 diff --git a/phivenv/Lib/site-packages/torch/include/c10/core/RefcountedDeleter.h b/phivenv/Lib/site-packages/torch/include/c10/core/RefcountedDeleter.h new file mode 100644 index 0000000000000000000000000000000000000000..d4cd755f86d3723d07804f28cbfe1d9c6ad3dce3 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/core/RefcountedDeleter.h @@ -0,0 +1,52 @@ +#pragma once + +#include +#include +#include + +#include +#include + +namespace c10 { + +// A RefcountedDeleterContext object is used as the `ctx` argument for DataPtr +// to implement a shared DataPtr. Normally, a DataPtr is unique, but we use +// this custom context and the `refcounted_deleter` function below to make the +// DataPtr act like a non-unique DataPtr. This context object holds onto an +// inner context and deleter function which handle the actual deletion of the +// data when the refcount reaches 0. +// +// This shared DataPtr feature is only used when storages are shared between +// multiple Python interpreters in MultiPy. // codespell:ignore multipy +// Before storages had PyObject preservation, interpreters could just share the +// same StorageImpl instance. But now a StorageImpl can only be associated with +// one interpreter in order to properly manage a zombie PyObject. So we share +// storages across Python interpreters by creating a different StorageImpl +// instance for each one, but they all point to the same data. +struct C10_API RefcountedDeleterContext { + RefcountedDeleterContext(void* other_ctx, c10::DeleterFnPtr other_deleter) + : other_ctx(other_ctx, other_deleter), refcount(1) {} + + std::unique_ptr other_ctx; + std::atomic_int refcount; +}; + +// `refcounted_deleter` is used as the `ctx_deleter` for DataPtr to implement +// a shared DataPtr. +// +// Warning: This should only be called on a pointer to +// a RefcountedDeleterContext that was allocated on the heap with `new`, +// because when the refcount reaches 0, the context is deleted with `delete` +C10_API void refcounted_deleter(void* ctx_); + +// If the storage's DataPtr does not use `refcounted_deleter`, replace it with +// a DataPtr that does, so it can be shared between multiple StorageImpls +C10_API void maybeApplyRefcountedDeleter(const c10::Storage& storage); + +// Create a new StorageImpl that points to the same data. If the original +// StorageImpl's DataPtr does not use `refcounted_deleter`, it will be replaced +// with one that does +C10_API c10::Storage newStorageImplFromRefcountedDataPtr( + const c10::Storage& storage); + +} // namespace c10 diff --git a/phivenv/Lib/site-packages/torch/include/c10/core/SafePyObject.h b/phivenv/Lib/site-packages/torch/include/c10/core/SafePyObject.h new file mode 100644 index 0000000000000000000000000000000000000000..a53e8bcbd7ce15995871cf146d24684fcf8c20b0 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/core/SafePyObject.h @@ -0,0 +1,120 @@ +#pragma once + +#include +#include +#include +#include + +namespace c10 { + +// This is an safe owning holder for a PyObject, akin to pybind11's +// py::object, with two major differences: +// +// - It is in c10/core; i.e., you can use this type in contexts where +// you do not have a libpython dependency +// +// - It is multi-interpreter safe (ala torchdeploy); when you fetch +// the underlying PyObject* you are required to specify what the current +// interpreter context is and we will check that you match it. +// +// It is INVALID to store a reference to a Tensor object in this way; +// you should just use TensorImpl directly in that case! +struct C10_API SafePyObject { + // Steals a reference to data + SafePyObject(PyObject* data, c10::impl::PyInterpreter* pyinterpreter) + : data_(data), pyinterpreter_(pyinterpreter) {} + SafePyObject(SafePyObject&& other) noexcept + : data_(std::exchange(other.data_, nullptr)), + pyinterpreter_(other.pyinterpreter_) {} + // For now it's not used, so we just disallow it. + SafePyObject& operator=(SafePyObject&&) = delete; + + SafePyObject(SafePyObject const& other) + : data_(other.data_), pyinterpreter_(other.pyinterpreter_) { + if (data_ != nullptr) { + (*pyinterpreter_)->incref(data_); + } + } + + SafePyObject& operator=(SafePyObject const& other) { + if (this == &other) { + return *this; // Handle self-assignment + } + if (other.data_ != nullptr) { + (*other.pyinterpreter_)->incref(other.data_); + } + if (data_ != nullptr) { + (*pyinterpreter_)->decref(data_, /*has_pyobj_slot*/ false); + } + data_ = other.data_; + pyinterpreter_ = other.pyinterpreter_; + return *this; + } + + ~SafePyObject() { + if (data_ != nullptr) { + (*pyinterpreter_)->decref(data_, /*has_pyobj_slot*/ false); + } + } + + c10::impl::PyInterpreter& pyinterpreter() const { + return *pyinterpreter_; + } + PyObject* ptr(const c10::impl::PyInterpreter*) const; + + // stop tracking the current object, and return it + PyObject* release() { + auto rv = data_; + data_ = nullptr; + return rv; + } + + private: + PyObject* data_; + c10::impl::PyInterpreter* pyinterpreter_; +}; + +// A newtype wrapper around SafePyObject for type safety when a python object +// represents a specific type. Note that `T` is only used as a tag and isn't +// actually used for any true purpose. +template +struct SafePyObjectT : private SafePyObject { + SafePyObjectT(PyObject* data, c10::impl::PyInterpreter* pyinterpreter) + : SafePyObject(data, pyinterpreter) {} + ~SafePyObjectT() = default; + SafePyObjectT(SafePyObjectT&& other) noexcept : SafePyObject(other) {} + SafePyObjectT(SafePyObjectT const&) = delete; + SafePyObjectT& operator=(SafePyObjectT const&) = delete; + SafePyObjectT& operator=(SafePyObjectT&&) = delete; + + using SafePyObject::ptr; + using SafePyObject::pyinterpreter; + using SafePyObject::release; +}; + +// Like SafePyObject, but non-owning. Good for references to global PyObjects +// that will be leaked on interpreter exit. You get a copy constructor/assign +// this way. +struct C10_API SafePyHandle { + SafePyHandle() : data_(nullptr), pyinterpreter_(nullptr) {} + SafePyHandle(PyObject* data, c10::impl::PyInterpreter* pyinterpreter) + : data_(data), pyinterpreter_(pyinterpreter) {} + + c10::impl::PyInterpreter& pyinterpreter() const { + return *pyinterpreter_; + } + PyObject* ptr(const c10::impl::PyInterpreter*) const; + void reset() { + data_ = nullptr; + pyinterpreter_ = nullptr; + } + operator bool() { + return data_; + } + + private: + PyObject* data_; + c10::impl::PyInterpreter* pyinterpreter_; +}; + +} // namespace c10 diff --git a/phivenv/Lib/site-packages/torch/include/c10/core/Scalar.h b/phivenv/Lib/site-packages/torch/include/c10/core/Scalar.h new file mode 100644 index 0000000000000000000000000000000000000000..2ad11634821bd763da3189602d04247c5443c3b2 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/core/Scalar.h @@ -0,0 +1,460 @@ +#pragma once + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace c10 { + +/** + * Scalar represents a 0-dimensional tensor which contains a single element. + * Unlike a tensor, numeric literals (in C++) are implicitly convertible to + * Scalar (which is why, for example, we provide both add(Tensor) and + * add(Scalar) overloads for many operations). It may also be used in + * circumstances where you statically know a tensor is 0-dim and single size, + * but don't know its type. + */ +class C10_API Scalar { + public: + Scalar() : Scalar(int64_t(0)) {} + + void destroy() { + if (Tag::HAS_si == tag || Tag::HAS_sd == tag || Tag::HAS_sb == tag) { + raw::intrusive_ptr::decref(v.p); + v.p = nullptr; + } + } + + ~Scalar() { + destroy(); + } + +#define DEFINE_IMPLICIT_CTOR(type, name) \ + Scalar(type vv) : Scalar(vv, true) {} + + AT_FORALL_SCALAR_TYPES_AND3(Half, BFloat16, ComplexHalf, DEFINE_IMPLICIT_CTOR) + AT_FORALL_COMPLEX_TYPES(DEFINE_IMPLICIT_CTOR) + AT_FORALL_FLOAT8_TYPES(DEFINE_IMPLICIT_CTOR) + + // Helper constructors to allow Scalar creation from long and long long types + // As std::is_same_v is false(except Android), one needs to + // provide a constructor from either long or long long in addition to one from + // int64_t +#if defined(__APPLE__) || defined(__MACOSX) + static_assert( + std::is_same_v, + "int64_t is the same as long long on MacOS"); + Scalar(long vv) : Scalar(vv, true) {} +#endif +#if defined(_MSC_VER) + static_assert( + std::is_same_v, + "int64_t is the same as long long on Windows"); + Scalar(long vv) : Scalar(vv, true) {} +#endif +#if defined(__linux__) && !defined(__ANDROID__) + static_assert( + sizeof(void*) != 8 || std::is_same_v, + "int64_t is the same as long on 64 bit Linux"); +#if LONG_MAX != INT_MAX + Scalar(long long vv) : Scalar(vv, true) {} +#endif /* not 32-bit system */ +#endif + + Scalar(uint16_t vv) : Scalar(vv, true) {} + Scalar(uint32_t vv) : Scalar(vv, true) {} + Scalar(uint64_t vv) { + if (vv > static_cast(INT64_MAX)) { + tag = Tag::HAS_u; + v.u = vv; + } else { + tag = Tag::HAS_i; + // NB: no need to use convert, we've already tested convertibility + v.i = static_cast(vv); + } + } + +#undef DEFINE_IMPLICIT_CTOR + + // Value* is both implicitly convertible to SymbolicVariable and bool which + // causes ambiguity error. Specialized constructor for bool resolves this + // problem. + template < + typename T, + typename std::enable_if_t, bool>* = nullptr> + Scalar(T vv) : tag(Tag::HAS_b) { + v.i = convert(vv); + } + + template < + typename T, + typename std::enable_if_t, bool>* = + nullptr> + Scalar(T vv) : tag(Tag::HAS_sb) { + v.i = convert(vv); + } + +#define DEFINE_ACCESSOR(type, name) \ + type to##name() const { \ + if (Tag::HAS_d == tag) { \ + return checked_convert(v.d, #type); \ + } else if (Tag::HAS_z == tag) { \ + return checked_convert>(v.z, #type); \ + } else if (Tag::HAS_sd == tag) { \ + return checked_convert( \ + toSymFloat().guard_float(__FILE__, __LINE__), #type); \ + } \ + if (Tag::HAS_b == tag) { \ + return checked_convert(v.i, #type); \ + } else if (Tag::HAS_i == tag) { \ + return checked_convert(v.i, #type); \ + } else if (Tag::HAS_u == tag) { \ + return checked_convert(v.u, #type); \ + } else if (Tag::HAS_si == tag) { \ + return checked_convert( \ + toSymInt().guard_int(__FILE__, __LINE__), #type); \ + } else if (Tag::HAS_sb == tag) { \ + return checked_convert( \ + toSymBool().guard_bool(__FILE__, __LINE__), #type); \ + } \ + TORCH_CHECK(false) \ + } + + // TODO: Support ComplexHalf accessor + AT_FORALL_SCALAR_TYPES_WITH_COMPLEX(DEFINE_ACCESSOR) + DEFINE_ACCESSOR(uint16_t, UInt16) + DEFINE_ACCESSOR(uint32_t, UInt32) + DEFINE_ACCESSOR(uint64_t, UInt64) + +#undef DEFINE_ACCESSOR + + SymInt toSymInt() const { + if (Tag::HAS_si == tag) { + return c10::SymInt(intrusive_ptr::reclaim_copy( + static_cast(v.p))); + } else { + return toLong(); + } + } + + SymFloat toSymFloat() const { + if (Tag::HAS_sd == tag) { + return c10::SymFloat(intrusive_ptr::reclaim_copy( + static_cast(v.p))); + } else { + return toDouble(); + } + } + + SymBool toSymBool() const { + if (Tag::HAS_sb == tag) { + return c10::SymBool(intrusive_ptr::reclaim_copy( + static_cast(v.p))); + } else { + return toBool(); + } + } + + // also support scalar.to(); + // Deleted for unsupported types, but specialized below for supported types + template + T to() const = delete; + + // audit uses of data_ptr + const void* data_ptr() const { + TORCH_INTERNAL_ASSERT(!isSymbolic()); + return static_cast(&v); + } + + bool isFloatingPoint() const { + return Tag::HAS_d == tag || Tag::HAS_sd == tag; + } + + [[deprecated( + "isIntegral is deprecated. Please use the overload with 'includeBool' parameter instead.")]] bool + isIntegral() const { + return Tag::HAS_i == tag || Tag::HAS_si == tag || Tag::HAS_u == tag; + } + bool isIntegral(bool includeBool) const { + return Tag::HAS_i == tag || Tag::HAS_si == tag || Tag::HAS_u == tag || + (includeBool && isBoolean()); + } + + bool isComplex() const { + return Tag::HAS_z == tag; + } + bool isBoolean() const { + return Tag::HAS_b == tag || Tag::HAS_sb == tag; + } + + // you probably don't actually want these; they're mostly for testing + bool isSymInt() const { + return Tag::HAS_si == tag; + } + bool isSymFloat() const { + return Tag::HAS_sd == tag; + } + bool isSymBool() const { + return Tag::HAS_sb == tag; + } + + bool isSymbolic() const { + return Tag::HAS_si == tag || Tag::HAS_sd == tag || Tag::HAS_sb == tag; + } + + C10_ALWAYS_INLINE Scalar& operator=(Scalar&& other) noexcept { + if (&other == this) { + return *this; + } + + destroy(); + moveFrom(std::move(other)); + return *this; + } + + C10_ALWAYS_INLINE Scalar& operator=(const Scalar& other) { + if (&other == this) { + return *this; + } + + *this = Scalar(other); + return *this; + } + + Scalar operator-() const; + Scalar conj() const; + Scalar log() const; + + template < + typename T, + typename std::enable_if_t::value, int> = 0> + bool equal(T num) const { + if (isComplex()) { + TORCH_INTERNAL_ASSERT(!isSymbolic()); + auto val = v.z; + return (val.real() == num) && (val.imag() == T()); + } else if (isFloatingPoint()) { + return toDouble() == num; + } else if (tag == Tag::HAS_i) { + if (overflows(v.i, /* strict_unsigned */ true)) { + return false; + } else { + return static_cast(v.i) == num; + } + } else if (tag == Tag::HAS_u) { + if (overflows(v.u, /* strict_unsigned */ true)) { + return false; + } else { + return static_cast(v.u) == num; + } + } else if (tag == Tag::HAS_si) { + TORCH_INTERNAL_ASSERT(false, "NYI SymInt equality"); + } else if (isBoolean()) { + // boolean scalar does not equal to a non boolean value + TORCH_INTERNAL_ASSERT(!isSymbolic()); + return false; + } else { + TORCH_INTERNAL_ASSERT(false); + } + } + + template < + typename T, + typename std::enable_if_t::value, int> = 0> + bool equal(T num) const { + if (isComplex()) { + TORCH_INTERNAL_ASSERT(!isSymbolic()); + return v.z == num; + } else if (isFloatingPoint()) { + return (toDouble() == num.real()) && (num.imag() == T()); + } else if (tag == Tag::HAS_i) { + if (overflows(v.i, /* strict_unsigned */ true)) { + return false; + } else { + return static_cast(v.i) == num.real() && num.imag() == T(); + } + } else if (tag == Tag::HAS_u) { + if (overflows(v.u, /* strict_unsigned */ true)) { + return false; + } else { + return static_cast(v.u) == num.real() && num.imag() == T(); + } + } else if (tag == Tag::HAS_si) { + TORCH_INTERNAL_ASSERT(false, "NYI SymInt equality"); + } else if (isBoolean()) { + // boolean scalar does not equal to a non boolean value + TORCH_INTERNAL_ASSERT(!isSymbolic()); + return false; + } else { + TORCH_INTERNAL_ASSERT(false); + } + } + + bool equal(bool num) const { + if (isBoolean()) { + TORCH_INTERNAL_ASSERT(!isSymbolic()); + return static_cast(v.i) == num; + } else { + return false; + } + } + + ScalarType type() const { + if (isComplex()) { + return ScalarType::ComplexDouble; + } else if (isFloatingPoint()) { + return ScalarType::Double; + } else if (isIntegral(/*includeBool=*/false)) { + // Represent all integers as long, UNLESS it is unsigned and therefore + // unrepresentable as long + if (Tag::HAS_u == tag) { + return ScalarType::UInt64; + } + return ScalarType::Long; + } else if (isBoolean()) { + return ScalarType::Bool; + } else { + throw std::runtime_error("Unknown scalar type."); + } + } + + Scalar(Scalar&& rhs) noexcept : tag(rhs.tag) { + moveFrom(std::move(rhs)); + } + + Scalar(const Scalar& rhs) : tag(rhs.tag), v(rhs.v) { + if (isSymbolic()) { + c10::raw::intrusive_ptr::incref(v.p); + } + } + + Scalar(c10::SymInt si) { + if (auto m = si.maybe_as_int()) { + tag = Tag::HAS_i; + v.i = *m; + } else { + tag = Tag::HAS_si; + v.p = std::move(si).release(); + } + } + + Scalar(c10::SymFloat sd) { + if (sd.is_symbolic()) { + tag = Tag::HAS_sd; + v.p = std::move(sd).release(); + } else { + tag = Tag::HAS_d; + v.d = sd.as_float_unchecked(); + } + } + + Scalar(c10::SymBool sb) { + if (auto m = sb.maybe_as_bool()) { + tag = Tag::HAS_b; + v.i = *m; + } else { + tag = Tag::HAS_sb; + v.p = std::move(sb).release(); + } + } + + // We can't set v in the initializer list using the + // syntax v{ .member = ... } because it doesn't work on MSVC + private: + enum class Tag { HAS_d, HAS_i, HAS_u, HAS_z, HAS_b, HAS_sd, HAS_si, HAS_sb }; + + // Note [Meaning of HAS_u] + // ~~~~~~~~~~~~~~~~~~~~~~~ + // HAS_u is a bit special. On its face, it just means that we + // are holding an unsigned integer. However, we generally don't + // distinguish between different bit sizes in Scalar (e.g., we represent + // float as double), instead, it represents a mathematical notion + // of some quantity (integral versus floating point). So actually, + // HAS_u is used solely to represent unsigned integers that could + // not be represented as a signed integer. That means only uint64_t + // potentially can get this tag; smaller types like uint8_t fits into a + // regular int and so for BC reasons we keep as an int. + + // NB: assumes that self has already been cleared + // NOLINTNEXTLINE(cppcoreguidelines-rvalue-reference-param-not-moved) + C10_ALWAYS_INLINE void moveFrom(Scalar&& rhs) noexcept { + v = rhs.v; + tag = rhs.tag; + if (rhs.tag == Tag::HAS_si || rhs.tag == Tag::HAS_sd || + rhs.tag == Tag::HAS_sb) { + // Move out of scalar + rhs.tag = Tag::HAS_i; + rhs.v.i = 0; + } + } + + Tag tag; + + union v_t { + double d{}; + int64_t i; + // See Note [Meaning of HAS_u] + uint64_t u; + c10::complex z; + c10::intrusive_ptr_target* p; + // NOLINTNEXTLINE(modernize-use-equals-default) + v_t() {} // default constructor + } v; + + template < + typename T, + typename std::enable_if_t< + std::is_integral_v && !std::is_same_v, + bool>* = nullptr> + Scalar(T vv, bool) : tag(Tag::HAS_i) { + v.i = convert(vv); + } + + template < + typename T, + typename std::enable_if_t< + !std::is_integral_v && !c10::is_complex::value, + bool>* = nullptr> + Scalar(T vv, bool) : tag(Tag::HAS_d) { + v.d = convert(vv); + } + + template < + typename T, + typename std::enable_if_t::value, bool>* = nullptr> + Scalar(T vv, bool) : tag(Tag::HAS_z) { + v.z = convert(vv); + } +}; + +using OptionalScalarRef = c10::OptionalRef; + +// define the scalar.to() specializations +#define DEFINE_TO(T, name) \ + template <> \ + inline T Scalar::to() const { \ + return to##name(); \ + } +AT_FORALL_SCALAR_TYPES_WITH_COMPLEX(DEFINE_TO) +DEFINE_TO(uint16_t, UInt16) +DEFINE_TO(uint32_t, UInt32) +DEFINE_TO(uint64_t, UInt64) +#undef DEFINE_TO + +} // namespace c10 diff --git a/phivenv/Lib/site-packages/torch/include/c10/core/ScalarType.h b/phivenv/Lib/site-packages/torch/include/c10/core/ScalarType.h new file mode 100644 index 0000000000000000000000000000000000000000..5ed028878e8a186d82edf7c5defeaa4ccc85fce1 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/core/ScalarType.h @@ -0,0 +1,612 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +namespace c10 { + +// dummy struct for uint1 to uint7, actual functionality +// of these dtypes will be implemented in python with Tensor subclass +template +struct dummy_uint1_7_t {}; + +// dummy struct for int1 to int7, actual functionality +// of these dtypes will be implemented in python with Tensor subclass +template +struct dummy_int1_7_t {}; + +// For the macros below: +// +// For users: If you want to macro some code for all non-QInt scalar types +// (i.e. types with complete information, you probably want one of the +// AT_FORALL_SCALAR_TYPES / AT_FORALL_SCALAR_TYPES_AND macros below, which are +// designed to behave similarly to the Dispatch macros with the same name. +// +// For adding a new dtype: In the beginning, we had an idea that there was a +// list of all scalar types, and you could use AT_FORALL_SCALAR_TYPES to +// iterate over them. But over the years we added weird types which couldn't +// be handled uniformly everywhere and so in the end we ended up with some +// mish-mosh of some helper macros, but mostly use sites making a call about +// what dtypes they can or can't support. So if you want to add a new dtype, +// the preferred resolution is to find a dtype similar to what you want, +// grep for it and edit all the sites you find this way. If you need to add +// a completely new kind of dtype, you're going to have to laboriously audit +// all of the sites everywhere to figure out how it should work. Consulting +// some old PRs where we added new dtypes (check history of this file) can +// help give you an idea where to start. + +// NB: Order matters for this macro; it is relied upon in +// _promoteTypesLookup and the serialization format. +#define AT_FORALL_SCALAR_TYPES_WITH_COMPLEX_AND_QINTS(_) \ + _(uint8_t, Byte) /* 0 */ \ + _(int8_t, Char) /* 1 */ \ + _(int16_t, Short) /* 2 */ \ + _(int, Int) /* 3 */ \ + _(int64_t, Long) /* 4 */ \ + _(at::Half, Half) /* 5 */ \ + _(float, Float) /* 6 */ \ + _(double, Double) /* 7 */ \ + _(c10::complex, ComplexHalf) /* 8 */ \ + _(c10::complex, ComplexFloat) /* 9 */ \ + _(c10::complex, ComplexDouble) /* 10 */ \ + _(bool, Bool) /* 11 */ \ + _(c10::qint8, QInt8) /* 12 */ \ + _(c10::quint8, QUInt8) /* 13 */ \ + _(c10::qint32, QInt32) /* 14 */ \ + _(at::BFloat16, BFloat16) /* 15 */ \ + _(c10::quint4x2, QUInt4x2) /* 16 */ \ + _(c10::quint2x4, QUInt2x4) /* 17 */ \ + _(c10::bits1x8, Bits1x8) /* 18 */ \ + _(c10::bits2x4, Bits2x4) /* 19 */ \ + _(c10::bits4x2, Bits4x2) /* 20 */ \ + _(c10::bits8, Bits8) /* 21 */ \ + _(c10::bits16, Bits16) /* 22 */ \ + _(c10::Float8_e5m2, Float8_e5m2) /* 23 */ \ + _(c10::Float8_e4m3fn, Float8_e4m3fn) /* 24 */ \ + _(c10::Float8_e5m2fnuz, Float8_e5m2fnuz) /* 25 */ \ + _(c10::Float8_e4m3fnuz, Float8_e4m3fnuz) /* 26 */ \ + _(uint16_t, UInt16) /* 27 */ \ + _(uint32_t, UInt32) /* 28 */ \ + _(uint64_t, UInt64) /* 29 */ \ + _(c10::dummy_uint1_7_t<1>, UInt1) /* 30 */ \ + _(c10::dummy_uint1_7_t<2>, UInt2) /* 31 */ \ + _(c10::dummy_uint1_7_t<3>, UInt3) /* 32 */ \ + _(c10::dummy_uint1_7_t<4>, UInt4) /* 33 */ \ + _(c10::dummy_uint1_7_t<5>, UInt5) /* 34 */ \ + _(c10::dummy_uint1_7_t<6>, UInt6) /* 35 */ \ + _(c10::dummy_uint1_7_t<7>, UInt7) /* 36 */ \ + _(c10::dummy_int1_7_t<1>, Int1) /* 37 */ \ + _(c10::dummy_int1_7_t<2>, Int2) /* 38 */ \ + _(c10::dummy_int1_7_t<3>, Int3) /* 39 */ \ + _(c10::dummy_int1_7_t<4>, Int4) /* 40 */ \ + _(c10::dummy_int1_7_t<5>, Int5) /* 41 */ \ + _(c10::dummy_int1_7_t<6>, Int6) /* 42 */ \ + _(c10::dummy_int1_7_t<7>, Int7) /* 43 */ \ + _(c10::Float8_e8m0fnu, Float8_e8m0fnu) /* 44 */ \ + _(c10::Float4_e2m1fn_x2, Float4_e2m1fn_x2) /* 45 */ + +// If you want to support ComplexHalf for real, add ComplexHalf +// into this macro (and change the name). But beware: convert() +// doesn't work for all the conversions you need... +// +// TODO: To add unsigned int types here, we must define accumulate type. +// But uint8 currently accumulates into int64, so we would have to make +// an inconsistent choice for the larger types. Difficult. +#define AT_FORALL_SCALAR_TYPES_WITH_COMPLEX_EXCEPT_COMPLEX_HALF_F8NZ(_) \ + _(uint8_t, Byte) \ + _(int8_t, Char) \ + _(int16_t, Short) \ + _(int, Int) \ + _(int64_t, Long) \ + _(at::Half, Half) \ + _(float, Float) \ + _(double, Double) \ + _(c10::complex, ComplexFloat) \ + _(c10::complex, ComplexDouble) \ + _(bool, Bool) \ + _(at::BFloat16, BFloat16) \ + _(at::Float8_e5m2, Float8_e5m2) \ + _(at::Float8_e4m3fn, Float8_e4m3fn) + +// This macro controls many of our C++ APIs, including constructors +// for Scalar as well as the data() and item() accessors on Tensor +#define AT_FORALL_SCALAR_TYPES_WITH_COMPLEX(_) \ + _(uint8_t, Byte) \ + _(int8_t, Char) \ + _(int16_t, Short) \ + _(int, Int) \ + _(int64_t, Long) \ + _(at::Half, Half) \ + _(float, Float) \ + _(double, Double) \ + _(c10::complex, ComplexHalf) \ + _(c10::complex, ComplexFloat) \ + _(c10::complex, ComplexDouble) \ + _(bool, Bool) \ + _(at::BFloat16, BFloat16) \ + _(at::Float8_e5m2, Float8_e5m2) \ + _(at::Float8_e4m3fn, Float8_e4m3fn) \ + _(at::Float8_e5m2fnuz, Float8_e5m2fnuz) \ + _(at::Float8_e4m3fnuz, Float8_e4m3fnuz) \ + _(at::Float8_e8m0fnu, Float8_e8m0fnu) + +enum class ScalarType : int8_t { +#define DEFINE_ST_ENUM_VAL_(_1, n) n, + AT_FORALL_SCALAR_TYPES_WITH_COMPLEX_AND_QINTS(DEFINE_ST_ENUM_VAL_) +#undef DEFINE_ENUM_ST_ENUM_VAL_ + Undefined, + NumOptions +}; + +constexpr uint16_t NumScalarTypes = + static_cast(ScalarType::NumOptions); + +namespace impl { + +// These are used to map ScalarTypes to C++ types. + +template +struct ScalarTypeToCPPType; + +#define SPECIALIZE_ScalarTypeToCPPType(cpp_type, scalar_type) \ + template <> \ + struct ScalarTypeToCPPType { \ + using type = cpp_type; \ + \ + /* This is a workaround for the CUDA bug which prevents */ \ + /* ::detail::ScalarTypeToCType::type being used directly due to */ \ + /* ambiguous reference which can't to be resolved. For some reason it */ \ + /* can't pick between at::detail and at::cuda::detail. */ \ + /* For repro example, please see: */ \ + /* https://gist.github.com/izdeby/952ae7cf256ddb740a73776d39a7e7ba */ \ + /* TODO: remove once the bug is fixed. */ \ + static type t; \ + }; + +AT_FORALL_SCALAR_TYPES_WITH_COMPLEX_AND_QINTS(SPECIALIZE_ScalarTypeToCPPType) + +#undef SPECIALIZE_ScalarTypeToCPPType + +template +using ScalarTypeToCPPTypeT = typename ScalarTypeToCPPType::type; + +} // namespace impl + +template +struct CppTypeToScalarType; + +#define SPECIALIZE_CppTypeToScalarType(cpp_type, scalar_type) \ + template <> \ + struct CppTypeToScalarType \ + : std:: \ + integral_constant { \ + }; + +AT_FORALL_SCALAR_TYPES_WITH_COMPLEX_AND_QINTS(SPECIALIZE_CppTypeToScalarType) + +#undef SPECIALIZE_CppTypeToScalarType + +// NB: despite its generic sounding name, the macros that don't take _AND +// are mostly only used by tensorexpr +#define AT_FORALL_INT_TYPES(_) \ + _(uint8_t, Byte) \ + _(int8_t, Char) \ + _(int16_t, Short) \ + _(int, Int) \ + _(int64_t, Long) + +#define AT_FORALL_SCALAR_TYPES(_) \ + _(uint8_t, Byte) \ + _(int8_t, Char) \ + _(int16_t, Short) \ + _(int, Int) \ + _(int64_t, Long) \ + _(float, Float) \ + _(double, Double) + +// These macros are often controlling how many template instantiations we +// create for kernels. It is typically inappropriate to add new dtypes here, +// instead, new types should be added to use sites on a case-by-case basis. +// We generally are not accepting new dtypes due to binary size concerns. + +#define AT_FORALL_SCALAR_TYPES_AND(SCALARTYPE, _) \ + _(uint8_t, Byte) \ + _(int8_t, Char) \ + _(int16_t, Short) \ + _(int, Int) \ + _(int64_t, Long) \ + _(float, Float) \ + _(double, Double) \ + _(decltype(::c10::impl::ScalarTypeToCPPType< \ + ::c10::ScalarType::SCALARTYPE>::t), \ + SCALARTYPE) + +#define AT_FORALL_SCALAR_TYPES_AND2(SCALARTYPE1, SCALARTYPE2, _) \ + _(uint8_t, Byte) \ + _(int8_t, Char) \ + _(int16_t, Short) \ + _(int, Int) \ + _(int64_t, Long) \ + _(float, Float) \ + _(double, Double) \ + _(decltype(::c10::impl::ScalarTypeToCPPType< \ + ::c10::ScalarType::SCALARTYPE1>::t), \ + SCALARTYPE1) \ + _(decltype(::c10::impl::ScalarTypeToCPPType< \ + ::c10::ScalarType::SCALARTYPE2>::t), \ + SCALARTYPE2) + +#define AT_FORALL_SCALAR_TYPES_AND3(SCALARTYPE1, SCALARTYPE2, SCALARTYPE3, _) \ + _(uint8_t, Byte) \ + _(int8_t, Char) \ + _(int16_t, Short) \ + _(int, Int) \ + _(int64_t, Long) \ + _(float, Float) \ + _(double, Double) \ + _(decltype(::c10::impl::ScalarTypeToCPPType< \ + ::c10::ScalarType::SCALARTYPE1>::t), \ + SCALARTYPE1) \ + _(decltype(::c10::impl::ScalarTypeToCPPType< \ + ::c10::ScalarType::SCALARTYPE2>::t), \ + SCALARTYPE2) \ + _(decltype(::c10::impl::ScalarTypeToCPPType< \ + ::c10::ScalarType::SCALARTYPE3>::t), \ + SCALARTYPE3) + +#define AT_FORALL_SCALAR_TYPES_AND7( \ + SCALARTYPE1, \ + SCALARTYPE2, \ + SCALARTYPE3, \ + SCALARTYPE4, \ + SCALARTYPE5, \ + SCALARTYPE6, \ + SCALARTYPE7, \ + _) \ + _(uint8_t, Byte) \ + _(int8_t, Char) \ + _(int16_t, Short) \ + _(int, Int) \ + _(int64_t, Long) \ + _(float, Float) \ + _(double, Double) \ + _(decltype(::c10::impl::ScalarTypeToCPPType< \ + ::c10::ScalarType::SCALARTYPE1>::t), \ + SCALARTYPE1) \ + _(decltype(::c10::impl::ScalarTypeToCPPType< \ + ::c10::ScalarType::SCALARTYPE2>::t), \ + SCALARTYPE2) \ + _(decltype(::c10::impl::ScalarTypeToCPPType< \ + ::c10::ScalarType::SCALARTYPE3>::t), \ + SCALARTYPE3) \ + _(decltype(::c10::impl::ScalarTypeToCPPType< \ + ::c10::ScalarType::SCALARTYPE4>::t), \ + SCALARTYPE4) \ + _(decltype(::c10::impl::ScalarTypeToCPPType< \ + ::c10::ScalarType::SCALARTYPE5>::t), \ + SCALARTYPE5) \ + _(decltype(::c10::impl::ScalarTypeToCPPType< \ + ::c10::ScalarType::SCALARTYPE6>::t), \ + SCALARTYPE6) \ + _(decltype(::c10::impl::ScalarTypeToCPPType< \ + ::c10::ScalarType::SCALARTYPE7>::t), \ + SCALARTYPE7) + +#define AT_FORALL_QINT_TYPES(_) \ + _(c10::qint8, QInt8) \ + _(c10::quint8, QUInt8) \ + _(c10::qint32, QInt32) \ + _(c10::quint4x2, QUInt4x2) \ + _(c10::quint2x4, QUInt2x4) + +#define AT_FORALL_FLOAT8_TYPES(_) \ + _(at::Float8_e5m2, Float8_e5m2) \ + _(at::Float8_e4m3fn, Float8_e4m3fn) \ + _(at::Float8_e5m2fnuz, Float8_e5m2fnuz) \ + _(at::Float8_e4m3fnuz, Float8_e4m3fnuz) \ + _(at::Float8_e8m0fnu, Float8_e8m0fnu) + +#define AT_FORALL_COMPLEX_TYPES(_) \ + _(c10::complex, ComplexFloat) \ + _(c10::complex, ComplexDouble) + +#define DEFINE_CONSTANT(_, name) \ + constexpr ScalarType k##name = ScalarType::name; + +// NOLINTNEXTLINE(clang-diagnostic-unused-const-variable) +AT_FORALL_SCALAR_TYPES_WITH_COMPLEX_AND_QINTS(DEFINE_CONSTANT) +#undef DEFINE_CONSTANT + +inline const char* toString(ScalarType t) { +#define DEFINE_CASE(_, name) \ + case ScalarType::name: \ + return #name; + + switch (t) { + AT_FORALL_SCALAR_TYPES_WITH_COMPLEX_AND_QINTS(DEFINE_CASE) + default: + return "UNKNOWN_SCALAR"; + } +#undef DEFINE_CASE +} + +inline size_t elementSize(ScalarType t) { +#define CASE_ELEMENTSIZE_CASE(ctype, name) \ + case ScalarType::name: \ + return sizeof(ctype); + + switch (t) { + AT_FORALL_SCALAR_TYPES_WITH_COMPLEX_AND_QINTS(CASE_ELEMENTSIZE_CASE) + default: + TORCH_CHECK(false, "Unknown ScalarType"); + } +#undef CASE_ELEMENTSIZE_CASE +} + +inline bool isIntegralType(ScalarType t, bool includeBool) { + bool isIntegral = + (t == ScalarType::Byte || t == ScalarType::Char || t == ScalarType::Int || + t == ScalarType::Long || t == ScalarType::Short || + t == ScalarType::UInt16 || t == ScalarType::UInt32 || + t == ScalarType::UInt64); + + return isIntegral || (includeBool && t == ScalarType::Bool); +} + +[[deprecated( + "isIntegralType is deprecated. Please use the overload with 'includeBool' parameter instead.")]] inline bool +isIntegralType(ScalarType t) { + return isIntegralType(t, /*includeBool=*/false); +} + +inline bool isFloat8Type(ScalarType t) { + return t == ScalarType::Float8_e5m2 || t == ScalarType::Float8_e5m2fnuz || + t == ScalarType::Float8_e4m3fn || t == ScalarType::Float8_e4m3fnuz || + t == ScalarType::Float8_e8m0fnu; +} + +inline bool isReducedFloatingType(ScalarType t) { + return t == ScalarType::Half || t == ScalarType::BFloat16 || + isFloat8Type(t) || t == ScalarType::Float4_e2m1fn_x2; +} + +inline bool isFloatingType(ScalarType t) { + return t == ScalarType::Double || t == ScalarType::Float || + isReducedFloatingType(t); +} + +inline bool isComplexType(ScalarType t) { + return ( + t == ScalarType::ComplexHalf || t == ScalarType::ComplexFloat || + t == ScalarType::ComplexDouble); +} + +inline bool isQIntType(ScalarType t) { + // Don't forget to extend this when adding new QInt types + return t == ScalarType::QInt8 || t == ScalarType::QUInt8 || + t == ScalarType::QInt32 || t == ScalarType::QUInt4x2 || + t == ScalarType::QUInt2x4; +} + +inline bool isBitsType(ScalarType t) { + return t == ScalarType::Bits1x8 || t == ScalarType::Bits2x4 || + t == ScalarType::Bits4x2 || t == ScalarType::Bits8 || + t == ScalarType::Bits16; +} + +inline bool isBarebonesUnsignedType(ScalarType t) { + return t == ScalarType::UInt1 || t == ScalarType::UInt2 || + t == ScalarType::UInt3 || t == ScalarType::UInt4 || + t == ScalarType::UInt5 || t == ScalarType::UInt6 || + t == ScalarType::UInt7 || t == ScalarType::UInt16 || + t == ScalarType::UInt32 || t == ScalarType::UInt64; +} + +inline ScalarType toQIntType(ScalarType t) { + switch (t) { + case ScalarType::Byte: + return ScalarType::QUInt8; + case ScalarType::Char: + return ScalarType::QInt8; + case ScalarType::Int: + return ScalarType::QInt32; + default: + return t; + } +} + +inline ScalarType toUnderlying(ScalarType t) { + switch (t) { + case ScalarType::QUInt8: + case ScalarType::QUInt4x2: + [[fallthrough]]; + case ScalarType::QUInt2x4: + return ScalarType::Byte; + case ScalarType::QInt8: + return ScalarType::Char; + case ScalarType::QInt32: + return ScalarType::Int; + default: + return t; + } +} + +inline bool isSignedType(ScalarType t) { +#define CASE_ISSIGNED(name) \ + case ScalarType::name: \ + return std::numeric_limits< \ + ::c10::impl::ScalarTypeToCPPTypeT>::is_signed; + + // TODO(#146647): If we expect to have numeric_limits for everything, + // let's just have a big macro for the whole thing. + // If we're hardcoding it, let's just use the macro and a "true"/"false" + // below? + switch (t) { + case ScalarType::QInt8: + case ScalarType::QUInt8: + case ScalarType::QInt32: + case ScalarType::QUInt4x2: + case ScalarType::QUInt2x4: + TORCH_CHECK(false, "isSignedType not supported for quantized types"); + case ScalarType::Bits1x8: + case ScalarType::Bits2x4: + case ScalarType::Bits4x2: + case ScalarType::Bits8: + case ScalarType::Bits16: + TORCH_CHECK(false, "Bits types are undefined"); + CASE_ISSIGNED(UInt16); + CASE_ISSIGNED(UInt32); + CASE_ISSIGNED(UInt64); + CASE_ISSIGNED(BFloat16); + CASE_ISSIGNED(Float8_e5m2); + CASE_ISSIGNED(Float8_e5m2fnuz); + CASE_ISSIGNED(Float8_e4m3fn); + CASE_ISSIGNED(Float8_e4m3fnuz); + CASE_ISSIGNED(Float8_e8m0fnu); + CASE_ISSIGNED(Byte); + CASE_ISSIGNED(Char); + CASE_ISSIGNED(Short); + CASE_ISSIGNED(Int); + CASE_ISSIGNED(Long); + CASE_ISSIGNED(Half); + CASE_ISSIGNED(Float); + CASE_ISSIGNED(Double); + CASE_ISSIGNED(ComplexHalf); + CASE_ISSIGNED(ComplexFloat); + CASE_ISSIGNED(ComplexDouble); + CASE_ISSIGNED(Bool); + case ScalarType::Int1: + case ScalarType::Int2: + case ScalarType::Int3: + case ScalarType::Int4: + case ScalarType::Int5: + case ScalarType::Int6: + case ScalarType::Int7: + case ScalarType::Float4_e2m1fn_x2: + return true; + case ScalarType::UInt1: + case ScalarType::UInt2: + case ScalarType::UInt3: + case ScalarType::UInt4: + case ScalarType::UInt5: + case ScalarType::UInt6: + case ScalarType::UInt7: + return false; + case ScalarType::Undefined: + case ScalarType::NumOptions: + break; + // Do not add default here, but rather define behavior of every new entry + // here. `-Wswitch-enum` would raise a warning in those cases. + } + TORCH_CHECK(false, "Unknown ScalarType ", t); +#undef CASE_ISSIGNED +} + +inline bool isUnderlying(ScalarType type, ScalarType qtype) { + return type == toUnderlying(qtype); +} + +inline ScalarType toRealValueType(ScalarType t) { + switch (t) { + case ScalarType::ComplexHalf: + return ScalarType::Half; + case ScalarType::ComplexFloat: + return ScalarType::Float; + case ScalarType::ComplexDouble: + return ScalarType::Double; + default: + return t; + } +} + +inline ScalarType toComplexType(ScalarType t) { + switch (t) { + case ScalarType::BFloat16: + // BFloat16 has range equivalent to Float, + // so we map it to ComplexFloat. + return ScalarType::ComplexFloat; + case ScalarType::Half: + return ScalarType::ComplexHalf; + case ScalarType::Float: + return ScalarType::ComplexFloat; + case ScalarType::Double: + return ScalarType::ComplexDouble; + case ScalarType::ComplexHalf: + return ScalarType::ComplexHalf; + case ScalarType::ComplexFloat: + return ScalarType::ComplexFloat; + case ScalarType::ComplexDouble: + return ScalarType::ComplexDouble; + default: + TORCH_CHECK(false, "Unknown Complex ScalarType for ", t); + } +} + +// see tensor_attributes.rst for detailed explanation and examples +// of casting rules. +inline bool canCast(const ScalarType from, const ScalarType to) { + // We disallow complex -> non complex, e.g., float_tensor *= complex is + // disallowed. + if (isComplexType(from) && !isComplexType(to)) { + return false; + } + // We disallow float -> integral, e.g., int_tensor *= float is disallowed. + if (isFloatingType(from) && isIntegralType(to, false)) { + return false; + } + + // Treat bool as a distinct "category," to be consistent with type promotion + // rules (e.g. `bool_tensor + 5 -> int64_tensor`). If `5` was in the same + // category as `bool_tensor`, we would not promote. Differing categories + // implies `bool_tensor += 5` is disallowed. + // + // NB: numpy distinguishes "unsigned" as a category to get the desired + // `bool_tensor + 5 -> int64_tensor` behavior. We don't, because: + // * We don't want the performance hit of checking the runtime sign of + // Scalars. + // * `uint8_tensor + 5 -> int64_tensor` would be undesirable. + if (from != ScalarType::Bool && to == ScalarType::Bool) { + return false; + } + return true; +} + +C10_API ScalarType promoteTypes(ScalarType a, ScalarType b); + +inline std::ostream& operator<<( + std::ostream& stream, + at::ScalarType scalar_type) { + return stream << toString(scalar_type); +} + +// Returns a pair of strings representing the names for each dtype. +// The returned pair is (name, legacy_name_if_applicable) +C10_API std::pair getDtypeNames( + c10::ScalarType scalarType); + +// Returns a map of string name to dtype. +C10_API const std::unordered_map& getStringToDtypeMap(); + +} // namespace c10 diff --git a/phivenv/Lib/site-packages/torch/include/c10/core/ScalarTypeToTypeMeta.h b/phivenv/Lib/site-packages/torch/include/c10/core/ScalarTypeToTypeMeta.h new file mode 100644 index 0000000000000000000000000000000000000000..fdc4a25f5adc0ea1be2098737498e7e539d39d1b --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/core/ScalarTypeToTypeMeta.h @@ -0,0 +1,57 @@ +#pragma once + +#include +#include +#include + +// these just expose TypeMeta/ScalarType bridge functions in c10 +// TODO move to typeid.h (or codemod away) when TypeMeta et al +// are moved from caffe2 to c10 (see note at top of typeid.h) + +namespace c10 { + +/** + * convert ScalarType enum values to TypeMeta handles + */ +inline caffe2::TypeMeta scalarTypeToTypeMeta(ScalarType scalar_type) { + return caffe2::TypeMeta::fromScalarType(scalar_type); +} + +/** + * convert TypeMeta handles to ScalarType enum values + */ +inline ScalarType typeMetaToScalarType(caffe2::TypeMeta dtype) { + return dtype.toScalarType(); +} + +/** + * typeMetaToScalarType(), lifted to optional + */ +inline std::optional optTypeMetaToScalarType( + std::optional type_meta) { + if (!type_meta.has_value()) { + return std::nullopt; + } + return type_meta->toScalarType(); +} + +/** + * convenience: equality across TypeMeta/ScalarType conversion + */ +inline bool operator==(ScalarType t, caffe2::TypeMeta m) { + return m.isScalarType(t); +} + +inline bool operator==(caffe2::TypeMeta m, ScalarType t) { + return t == m; +} + +inline bool operator!=(ScalarType t, caffe2::TypeMeta m) { + return !(t == m); +} + +inline bool operator!=(caffe2::TypeMeta m, ScalarType t) { + return !(t == m); +} + +} // namespace c10 diff --git a/phivenv/Lib/site-packages/torch/include/c10/core/Storage.h b/phivenv/Lib/site-packages/torch/include/c10/core/Storage.h new file mode 100644 index 0000000000000000000000000000000000000000..09be93941bb9e2a766a0784d96ee2a35dae8d099 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/core/Storage.h @@ -0,0 +1,272 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace c10 { + +struct Storage; + +C10_API bool isSharedStorageAlias( + const Storage& storage0, + const Storage& storage1); + +struct C10_API Storage { + public: + struct use_byte_size_t {}; + struct unsafe_borrow_t { + explicit unsafe_borrow_t() = default; + }; + + Storage() = default; + Storage(c10::intrusive_ptr ptr) + : storage_impl_(std::move(ptr)) {} + + // Allocates memory buffer using given allocator and creates a storage with it + Storage( + use_byte_size_t /*use_byte_size*/, + const SymInt& size_bytes, + Allocator* allocator = nullptr, + bool resizable = false) + : storage_impl_(c10::make_intrusive( + StorageImpl::use_byte_size_t(), + size_bytes, + allocator, + resizable)) {} + + // Creates storage with pre-allocated memory buffer. Allocator is given for + // potential future reallocations, however it can be nullptr if the storage + // is non-resizable + Storage( + use_byte_size_t /*use_byte_size*/, + size_t size_bytes, + at::DataPtr data_ptr, + at::Allocator* allocator = nullptr, + bool resizable = false) + : storage_impl_(c10::make_intrusive( + StorageImpl::use_byte_size_t(), + size_bytes, + std::move(data_ptr), + allocator, + resizable)) {} + + protected: + explicit Storage(unsafe_borrow_t, const Storage& rhs) + : storage_impl_(c10::intrusive_ptr::reclaim( + rhs.storage_impl_.get())) {} + + friend MaybeOwnedTraits; + + public: + // Legacy constructor for partially initialized (dtype or memory) storages + // that can be temporarily created with Caffe2 APIs. See the note on top of + // TensorImpl.h for details. + static Storage create_legacy(at::Device device) { + auto allocator = GetAllocator(device.type()); + return Storage(c10::make_intrusive( + StorageImpl::use_byte_size_t(), + 0, + allocator->allocate(0), // materialize a non-default Device. + allocator, + true)); + } + + // Mimic create_legacy, but without requiring a newly-created StorageImpl. + void reset_legacy() { + TORCH_CHECK(resizable() && allocator()); + set_nbytes(0); + set_data_ptr_noswap(allocator()->allocate(0)); + } + + // TODO: remove later + void set_nbytes(size_t size_bytes) const { + storage_impl_->set_nbytes(size_bytes); + } + + void set_nbytes(c10::SymInt size_bytes) const { + storage_impl_->set_nbytes(std::move(size_bytes)); + } + + bool resizable() const { + return storage_impl_->resizable(); + } + + size_t nbytes() const { + return storage_impl_->nbytes(); + } + + SymInt sym_nbytes() const { + return storage_impl_->sym_nbytes(); + } + // get() use here is to get const-correctness + + const void* data() const { + return storage_impl_->data(); + } + + void* mutable_data() const { + return storage_impl_->mutable_data(); + } + + at::DataPtr& mutable_data_ptr() const { + return storage_impl_->mutable_data_ptr(); + } + + const at::DataPtr& data_ptr() const { + return storage_impl_->data_ptr(); + } + + // Returns the previous data_ptr + at::DataPtr set_data_ptr(at::DataPtr&& data_ptr) const { + return storage_impl_->set_data_ptr(std::move(data_ptr)); + } + + void set_data_ptr_noswap(at::DataPtr&& data_ptr) const { + return storage_impl_->set_data_ptr_noswap(std::move(data_ptr)); + } + + DeviceType device_type() const { + return storage_impl_->device_type(); + } + + at::Allocator* allocator() const { + return storage_impl_->allocator(); + } + + at::Device device() const { + return storage_impl_->device(); + } + + StorageImpl* unsafeReleaseStorageImpl() { + return storage_impl_.release(); + } + + StorageImpl* unsafeGetStorageImpl() const noexcept { + return storage_impl_.get(); + } + + c10::weak_intrusive_ptr getWeakStorageImpl() const { + return c10::weak_intrusive_ptr(storage_impl_); + } + + operator bool() const { + return storage_impl_; + } + + size_t use_count() const { + return storage_impl_.use_count(); + } + + inline bool unique() const { + return storage_impl_.unique(); + } + + bool is_alias_of(const Storage& other) const { + return ( + storage_impl_ == other.storage_impl_ || + isSharedStorageAlias(*this, other)); + } + + void UniqueStorageShareExternalPointer( + void* src, + size_t capacity, + DeleterFnPtr d = nullptr) { + if (!storage_impl_.unique()) { + TORCH_CHECK( + false, + "UniqueStorageShareExternalPointer can only be called when use_count == 1"); + } + storage_impl_->UniqueStorageShareExternalPointer(src, capacity, d); + } + + void UniqueStorageShareExternalPointer( + at::DataPtr&& data_ptr, + size_t capacity) { + if (!storage_impl_.unique()) { + TORCH_CHECK( + false, + "UniqueStorageShareExternalPointer can only be called when use_count == 1"); + } + storage_impl_->UniqueStorageShareExternalPointer( + std::move(data_ptr), capacity); + } + + protected: + c10::intrusive_ptr storage_impl_; +}; + +template <> +struct MaybeOwnedTraits { + using owned_type = c10::Storage; + using borrow_type = c10::Storage; + + static borrow_type createBorrow(const owned_type& from) { + return borrow_type(borrow_type::unsafe_borrow_t{}, from); + } + + static void assignBorrow(borrow_type& lhs, const borrow_type& rhs) { + lhs.unsafeReleaseStorageImpl(); + lhs = borrow_type(borrow_type::unsafe_borrow_t{}, rhs); + } + + static void destroyBorrow(borrow_type& toDestroy) { + toDestroy.unsafeReleaseStorageImpl(); // "leak" it, but it was already +0. + } + + static const owned_type& referenceFromBorrow(const borrow_type& borrow) { + return borrow; + } + + static const owned_type* pointerFromBorrow(const borrow_type& borrow) { + return &borrow; + } + + static bool debugBorrowIsValid(const borrow_type& /*borrow*/) { + return true; + } +}; + +template <> +struct ExclusivelyOwnedTraits { + using repr_type = c10::Storage; + using pointer_type = c10::Storage*; + using const_pointer_type = const c10::Storage*; + + static repr_type nullRepr() { + return c10::Storage(); + } + + template + static repr_type createInPlace(Args&&... args) { + return c10::Storage(std::forward(args)...); + } + + static repr_type moveToRepr(c10::Storage&& x) { + return std::move(x); + } + + static c10::Storage take(c10::Storage& x) { + return std::move(x); + } + + static pointer_type getImpl(repr_type& x) { + return &x; + } + + static const_pointer_type getImpl(const repr_type& x) { + return &x; + } +}; + +} // namespace c10 diff --git a/phivenv/Lib/site-packages/torch/include/c10/core/StorageImpl.h b/phivenv/Lib/site-packages/torch/include/c10/core/StorageImpl.h new file mode 100644 index 0000000000000000000000000000000000000000..8e2610ed6a68184df006ed0ed2510ef0e62c24a9 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/core/StorageImpl.h @@ -0,0 +1,373 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace c10 { + +[[noreturn]] C10_API void throwNullDataPtrError(); +C10_API void warnDeprecatedDataPtr(); + +// Used in StorageImpl to store extra metadata. +// Currently used only for storing a custom error message +// used when throwing an exception when data_ptr is accessed. +struct C10_API StorageExtraMeta { + std::optional custom_data_ptr_error_msg_ = std::nullopt; +}; + +// A storage represents the underlying backing data buffer for a +// tensor. This concept was inherited from the original Torch7 +// codebase; we'd kind of like to get rid of the concept +// (see https://github.com/pytorch/pytorch/issues/14797) but +// it's hard work and no one has gotten around to doing it. +// +// NB: storage is supposed to uniquely own a data pointer; e.g., +// two non-null data pointers alias if and only if they are from +// the same storage. Technically you can violate this invariant +// (e.g., you can create a non-owning StorageImpl with at::from_blob) +// but a lot of things won't work correctly, including: +// +// - An ordinary deleter on such a storage is wrong, because normal deleters +// assume unique ownership, but if you have two storages at the same data, +// that implies there is some sort of shared ownership. So your deleter would +// have to actually be internally doing some sort of refcount thing +// - Deepcopy in Python side relies on storage equality and not data pointer +// equality; so if there are two separate storages pointing to the same data, +// the data will actually get duplicated in that case (one data ptr before, +// two data ptrs after) +// - Version counts won't work correctly, because we do all VC tracking at the +// level of storages (unless you explicitly disconnect the VC with detach); +// mutation because data pointers are the same are totally untracked +struct C10_API StorageImpl : public c10::intrusive_ptr_target { + public: + struct use_byte_size_t {}; + + StorageImpl( + use_byte_size_t /*use_byte_size*/, + SymInt size_bytes, + at::DataPtr data_ptr, + at::Allocator* allocator, + bool resizable) + : data_ptr_(std::move(data_ptr)), + size_bytes_(std::move(size_bytes)), + size_bytes_is_heap_allocated_(size_bytes_.is_heap_allocated()), + resizable_(resizable), + received_cuda_(false), + allocator_(allocator) { + if (resizable) { + TORCH_INTERNAL_ASSERT( + allocator_, "For resizable storage, allocator must be provided"); + } + refresh_has_data_ptr_check(); + } + + StorageImpl( + use_byte_size_t /*use_byte_size*/, + const SymInt& size_bytes, + at::Allocator* allocator, + bool resizable) + : StorageImpl( + use_byte_size_t(), + size_bytes, + size_bytes.is_heap_allocated() + ? allocator->allocate(0) + : allocator->allocate(size_bytes.as_int_unchecked()), + allocator, + resizable) {} + + StorageImpl& operator=(StorageImpl&& other) = delete; + StorageImpl& operator=(const StorageImpl&) = delete; + StorageImpl() = delete; + StorageImpl(StorageImpl&& other) = delete; + StorageImpl(const StorageImpl&) = delete; + ~StorageImpl() override = default; + + void reset() { + data_ptr_.clear(); + size_bytes_ = 0; + size_bytes_is_heap_allocated_ = false; + } + + // Destructor doesn't call release_resources because it's + // unnecessary; don't forget to change that if needed! + void release_resources() override { + data_ptr_.clear(); + } + + size_t nbytes() const { + // OK to do this instead of maybe_as_int as nbytes is guaranteed positive + TORCH_CHECK(!size_bytes_is_heap_allocated_); + return size_bytes_.as_int_unchecked(); + } + + SymInt sym_nbytes() const { + return size_bytes_; + } + + // TODO: remove later + void set_nbytes(size_t size_bytes) { + size_bytes_ = static_cast(size_bytes); + size_bytes_is_heap_allocated_ = false; + } + + void unsafe_set_nbytes(size_t size_bytes) { + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(!size_bytes_is_heap_allocated_); + size_bytes_.unsafe_set_data(size_bytes); + } + + void set_nbytes(c10::SymInt size_bytes) { + size_bytes_ = std::move(size_bytes); + } + + bool resizable() const { + return resizable_; + } + + const at::DataPtr& data_ptr() const { + if (C10_UNLIKELY(throw_on_immutable_data_ptr_)) { + throw_data_ptr_access_error(); + } + return data_ptr_; + } + + at::DataPtr& mutable_data_ptr() { + if (C10_UNLIKELY(has_mutable_data_ptr_check_)) { + if (throw_on_immutable_data_ptr_) { + throw_data_ptr_access_error(); + } + if (throw_on_mutable_data_ptr_) { + throwNullDataPtrError(); + } + if (warn_deprecated_on_mutable_data_ptr_) { + warnDeprecatedDataPtr(); + } + maybe_materialize_cow(); + } + return data_ptr_; + } + + // Returns the data_ptr. Bypasses all checks. + at::DataPtr& _mutable_data_ptr_no_checks() { + return data_ptr_; + } + + // Returns the previous data_ptr + at::DataPtr set_data_ptr(at::DataPtr&& data_ptr) { + // We need to materialize the old COW DataPtr because it is + // being returned as mutable. + maybe_materialize_cow(); + return set_data_ptr_no_materialize_cow(std::move(data_ptr)); + } + + void set_data_ptr_noswap(at::DataPtr&& data_ptr) { + data_ptr_ = std::move(data_ptr); + refresh_has_data_ptr_check(); + } + + const void* data() const { + if (C10_UNLIKELY(throw_on_immutable_data_ptr_)) { + throw_data_ptr_access_error(); + } + return data_ptr_.get(); + } + + void* mutable_data() { + if (C10_UNLIKELY(has_mutable_data_ptr_check_)) { + if (throw_on_immutable_data_ptr_) { + throw_data_ptr_access_error(); + } + if (throw_on_mutable_data_ptr_) { + throwNullDataPtrError(); + } + if (warn_deprecated_on_mutable_data_ptr_) { + warnDeprecatedDataPtr(); + } + maybe_materialize_cow(); + } + return data_ptr_.mutable_get(); + } + + at::DeviceType device_type() const { + return data_ptr_.device().type(); + } + + at::Allocator* allocator() { + return allocator_; + } + + const at::Allocator* allocator() const { + return allocator_; + } + + // You generally shouldn't use this method, but it is occasionally + // useful if you want to override how a tensor will be reallocated, + // after it was already allocated (and its initial allocator was + // set) + void set_allocator(at::Allocator* allocator) { + allocator_ = allocator; + } + + Device device() const { + return data_ptr_.device(); + } + + void set_resizable(bool resizable) { + if (resizable) { + // We need an allocator to be resizable + AT_ASSERT(allocator_); + } + resizable_ = resizable; + } + + /** + * Can only be called when use_count is 1 + */ + void UniqueStorageShareExternalPointer( + void* src, + size_t size_bytes, + DeleterFnPtr d = nullptr) { + UniqueStorageShareExternalPointer( + at::DataPtr(src, src, d, data_ptr_.device()), size_bytes); + } + + /** + * Can only be called when use_count is 1 + */ + void UniqueStorageShareExternalPointer( + at::DataPtr&& data_ptr, + size_t size_bytes) { + data_ptr_ = std::move(data_ptr); + size_bytes_ = static_cast(size_bytes); + size_bytes_is_heap_allocated_ = false; + allocator_ = nullptr; + resizable_ = false; + } + + // This method can be used only after storage construction and cannot be used + // to modify storage status + void set_received_cuda(bool received_cuda) { + received_cuda_ = received_cuda; + } + + bool received_cuda() { + return received_cuda_; + } + + impl::PyObjectSlot* pyobj_slot() { + return &pyobj_slot_; + } + + const impl::PyObjectSlot* pyobj_slot() const { + return &pyobj_slot_; + } + + StorageExtraMeta& get_extra_meta() { + if (!extra_meta_) { + extra_meta_ = std::make_unique(); + } + return *extra_meta_; + } + + [[noreturn]] void throw_data_ptr_access_error() const; + + void release_data_and_set_meta_custom_data_ptr_error_msg_( + std::optional s) { + throw_on_immutable_data_ptr_ = true; + get_extra_meta().custom_data_ptr_error_msg_ = std::move(s); + refresh_has_data_ptr_check(); + } + + void set_throw_on_mutable_data_ptr() { + throw_on_mutable_data_ptr_ = true; + refresh_has_data_ptr_check(); + } + + void set_warn_deprecated_on_mutable_data_ptr() { + warn_deprecated_on_mutable_data_ptr_ = true; + refresh_has_data_ptr_check(); + } + + protected: + // materialize_cow_storage needs to call set_data_ptr_no_materlize_cow + friend void c10::impl::cow::materialize_cow_storage(StorageImpl& storage); + + // Returns the previous data_ptr. If the old data_ptr was COW, + // this avoids materializing it + at::DataPtr set_data_ptr_no_materialize_cow(at::DataPtr&& data_ptr) { + at::DataPtr old_data_ptr(std::move(data_ptr_)); + data_ptr_ = std::move(data_ptr); + refresh_has_data_ptr_check(); + return old_data_ptr; + } + + private: + void refresh_has_data_ptr_check() { + has_mutable_data_ptr_check_ = is_cow() || throw_on_mutable_data_ptr_ || + warn_deprecated_on_mutable_data_ptr_ || throw_on_immutable_data_ptr_; + } + + inline bool is_cow() const { + return c10::impl::cow::is_cow_data_ptr(data_ptr_); + } + + // Triggers a copy if this is a copy-on-write tensor. + void maybe_materialize_cow() { + if (is_cow()) { + impl::cow::materialize_cow_storage(*this); + } + } + + DataPtr data_ptr_; + SymInt size_bytes_; + bool size_bytes_is_heap_allocated_; + bool resizable_; + // Identifies that Storage was received from another process and doesn't have + // local to process cuda memory allocation + bool received_cuda_; + // All special checks in data/data_ptr calls are guarded behind this single + // boolean. This is for performance: .data/.data_ptr calls are commonly in the + // hot-path. + bool has_mutable_data_ptr_check_ = false; + // If we should throw when mutable_data_ptr() or mutable_data() is called. + bool throw_on_mutable_data_ptr_ = false; + // If we should throw when data_ptr() or data() is called. + bool throw_on_immutable_data_ptr_ = false; + // If we warn when mutable_data_ptr() or mutable_data() is called. + bool warn_deprecated_on_mutable_data_ptr_ = false; + Allocator* allocator_; + impl::PyObjectSlot pyobj_slot_; + std::unique_ptr extra_meta_ = nullptr; +}; + +// Declare StorageImpl create function pointer types. +using StorageImplCreateHelper = intrusive_ptr (*)( + StorageImpl::use_byte_size_t, + SymInt size_bytes, + DataPtr data_ptr, + Allocator* allocator, + bool resizable); + +C10_API void SetStorageImplCreate(DeviceType t, StorageImplCreateHelper fptr); + +C10_API StorageImplCreateHelper GetStorageImplCreate(DeviceType t); + +C10_API c10::intrusive_ptr make_storage_impl( + c10::StorageImpl::use_byte_size_t use_byte_size, + c10::SymInt size_bytes, + c10::DataPtr data_ptr, + c10::Allocator* allocator, + bool resizable, + std::optional device_opt); + +} // namespace c10 diff --git a/phivenv/Lib/site-packages/torch/include/c10/core/Stream.h b/phivenv/Lib/site-packages/torch/include/c10/core/Stream.h new file mode 100644 index 0000000000000000000000000000000000000000..8defb338f4cb31e7b4f769722d66aa3b0ec1e46a --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/core/Stream.h @@ -0,0 +1,176 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace c10 { + +/// An index representing a specific stream. A StreamId is not independently +/// meaningful without knowing the Device it is associated with; try to +/// use Stream rather than StreamId directly. +/// +/// StreamIds are opaque; they are assigned by some DeviceType-specific +/// numbering system which is not visible to the user. HOWEVER, we +/// guarantee that StreamId 0 is always a valid stream, and corresponds +/// to some sort of "default" stream. +using StreamId = int64_t; + +struct C10_API StreamData3 { + StreamId stream_id; + DeviceIndex device_index; + DeviceType device_type; +}; + +// NB: I decided not to call the above StreamIndex to avoid confusion with +// DeviceIndex. This way, you access device index with index(), and stream id +// with id() + +/** + * A stream is a software mechanism used to synchronize launched kernels + * without requiring explicit synchronizations between kernels. The basic + * model is that every kernel launch is associated with a stream: every + * kernel on the same stream is implicitly synchronized so that if I launch + * kernels A and B on the same stream, A is guaranteed to finish before B + * launches. If I want B to run concurrently with A, I must schedule + * it on a different stream. + * + * The Stream class is a backend agnostic value class representing a stream + * which I may schedule a kernel on. Every stream is associated with a device, + * which is recorded in stream, which is used to avoid confusion about which + * device a stream refers to. + * + * Streams are explicitly thread-safe, in the sense that it is OK to pass + * a Stream from one thread to another, and kernels queued from two different + * threads will still get serialized appropriately. (Of course, the + * time when the kernels get queued is undetermined unless you synchronize + * host side ;) + * + * Stream does NOT have a default constructor. Streams are for expert + * users; if you want to use Streams, we're going to assume you know + * how to deal with C++ template error messages if you try to + * resize() a vector of Streams. + * + * Known instances of streams in backends: + * + * - cudaStream_t (CUDA) + * - hipStream_t (HIP) + * - cl_command_queue (OpenCL) (NB: Caffe2's existing OpenCL integration + * does NOT support command queues.) + * + * Because this class is device agnostic, it cannot provide backend-specific + * functionality (e.g., get the cudaStream_t of a CUDA stream.) There are + * wrapper classes which provide this functionality, e.g., CUDAStream. + */ +class C10_API Stream final { + private: + Device device_; + StreamId id_; + + public: + enum Unsafe { UNSAFE }; + enum Default { DEFAULT }; + + /// Unsafely construct a stream from a Device and a StreamId. In + /// general, only specific implementations of streams for a + /// backend should manufacture Stream directly in this way; other users + /// should use the provided APIs to get a stream. In particular, + /// we don't require backends to give any guarantees about non-zero + /// StreamIds; they are welcome to allocate in whatever way they like. + explicit Stream(Unsafe, Device device, StreamId id) + : device_(device), id_(id) {} + + /// Construct the default stream of a Device. The default stream is + /// NOT the same as the current stream; default stream is a fixed stream + /// that never changes, whereas the current stream may be changed by + /// StreamGuard. + explicit Stream(Default, Device device) : device_(device), id_(0) {} + + bool operator==(const Stream& other) const noexcept { + return this->device_ == other.device_ && this->id_ == other.id_; + } + bool operator!=(const Stream& other) const noexcept { + return !(*this == other); + } + + Device device() const noexcept { + return device_; + } + DeviceType device_type() const noexcept { + return device_.type(); + } + DeviceIndex device_index() const noexcept { + return device_.index(); + } + StreamId id() const noexcept { + return id_; + } + + // Enqueues a wait instruction in the stream's work queue. + // This instruction is a no-op unless the event is marked + // for recording. In that case the stream stops processing + // until the event is recorded. + template + void wait(const T& event) const { + event.block(*this); + } + + // Return whether all asynchronous work previously enqueued on this stream + // has completed running on the device. + bool query() const; + + // Wait (by blocking the calling thread) until all asynchronous work enqueued + // on this stream has completed running on the device. + void synchronize() const; + + // The purpose of this function is to more conveniently permit binding + // of Stream to and from Python. Without packing, I have to setup a whole + // class with two fields (device and stream id); with packing I can just + // store a single uint64_t. + // + // The particular way we pack streams into a uint64_t is considered an + // implementation detail and should not be relied upon. + uint64_t hash() const noexcept { + // Concat these together into a 64-bit integer + uint64_t bits = static_cast(device_type()) << 56 | + static_cast(device_index()) << 48 | + // Remove the sign extension part of the 64-bit address because + // the id might be used to hold a pointer. + (static_cast(id()) & ((1ull << 48) - 1)); + return bits; + } + + struct StreamData3 pack3() const { + return {id(), device_index(), device_type()}; + } + + static Stream unpack3( + StreamId stream_id, + DeviceIndex device_index, + DeviceType device_type) { + TORCH_CHECK(isValidDeviceType(device_type)); + return Stream(UNSAFE, Device(device_type, device_index), stream_id); + } + + // I decided NOT to provide setters on this class, because really, + // why would you change the device of a stream? Just construct + // it correctly from the beginning dude. +}; + +C10_API std::ostream& operator<<(std::ostream& stream, const Stream& s); + +} // namespace c10 + +namespace std { +template <> +struct hash { + size_t operator()(c10::Stream s) const noexcept { + return std::hash{}(s.hash()); + } +}; +} // namespace std diff --git a/phivenv/Lib/site-packages/torch/include/c10/core/StreamGuard.h b/phivenv/Lib/site-packages/torch/include/c10/core/StreamGuard.h new file mode 100644 index 0000000000000000000000000000000000000000..ab79f7e9bcc8aca4eb50945d4722bca18ddf820b --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/core/StreamGuard.h @@ -0,0 +1,173 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace c10 { + +/** + * A StreamGuard is an RAII class that changes the current device + * to the device corresponding to some stream, and changes the + * default stream on that device to be this stream. + * + * Use of StreamGuard is HIGHLY discouraged in operator definitions. In + * a single operator, you probably don't know enough about the global + * state of the world to profitably decide how to set streams. Let + * the caller handle this appropriately, and just use the current stream + * in your operator code. + * + * This StreamGuard does NOT have an uninitialized state; it is guaranteed + * to reset the stream and device on exit. If you are in a situation + * where you *might* want to setup a stream guard, see OptionalStreamGuard. + */ +struct StreamGuard { + /// No default constructor, see Note [Omitted default constructor from RAII] + explicit StreamGuard() = delete; + ~StreamGuard() = default; + + /// Set the current device to the device associated with the passed stream, + /// and set the current stream on that device to the passed stream. + explicit StreamGuard(Stream stream) : guard_(stream) {} + + /// Copy is disallowed + StreamGuard(const StreamGuard&) = delete; + StreamGuard& operator=(const StreamGuard&) = delete; + + /// Move is disallowed, as StreamGuard does not have an uninitialized state, + /// which is required for moves on types with nontrivial destructors. + StreamGuard(StreamGuard&& other) = delete; + StreamGuard& operator=(StreamGuard&& other) = delete; + + /// Resets the currently set stream to the original stream and + /// the currently set device to the original device. Then, + /// set the current device to the device associated with the passed stream, + /// and set the current stream on that device to the passed stream. + /// + /// NOTE: this implementation may skip some stream/device setting if + /// it can prove that it is unnecessary. + /// + /// WARNING: reset_stream does NOT preserve previously set streams on + /// different devices. If you need to set streams on multiple devices + /// on , use MultiStreamGuard instead. + void reset_stream(Stream stream) { + guard_.reset_stream(stream); + } + + /// Returns the stream that was set at the time the guard was constructed. + Stream original_stream() const { + return guard_.original_stream(); + } + + /// Returns the most recent stream that was set using this device guard, + /// either from construction, or via set_stream. + Stream current_stream() const { + return guard_.current_stream(); + } + + /// Returns the most recent device that was set using this device guard, + /// either from construction, or via set_device/reset_device/set_index. + Device current_device() const { + return guard_.current_device(); + } + + /// Returns the device that was set at the most recent reset_stream(), + /// or otherwise the device at construction time. + Device original_device() const { + return guard_.original_device(); + } + + private: + c10::impl::InlineStreamGuard guard_; +}; + +/** + * An OptionalStreamGuard is an RAII class that sets a device to some value on + * initialization, and resets the device to its original value on destruction. + * See OptionalDeviceGuard for more guidance on how to use this class. + */ +struct OptionalStreamGuard { + /// Create an uninitialized guard. + explicit OptionalStreamGuard() = default; + + /// Set the current device to the device associated with the passed stream, + /// and set the current stream on that device to the passed stream. + explicit OptionalStreamGuard(Stream stream) : guard_(stream) {} + + /// Set the current device to the device associated with the passed stream, + /// and set the current stream on that device to the passed stream, + /// if the passed stream is not nullopt. + explicit OptionalStreamGuard(std::optional stream_opt) + : guard_(stream_opt) {} + + /// Copy is disallowed + OptionalStreamGuard(const OptionalStreamGuard&) = delete; + OptionalStreamGuard& operator=(const OptionalStreamGuard&) = delete; + + // See Note [Move construction for RAII guards is tricky] + OptionalStreamGuard(OptionalStreamGuard&& other) = delete; + + // See Note [Move assignment for RAII guards is tricky] + OptionalStreamGuard& operator=(OptionalStreamGuard&& other) = delete; + ~OptionalStreamGuard() = default; + + /// Resets the currently set stream to the original stream and + /// the currently set device to the original device. Then, + /// set the current device to the device associated with the passed stream, + /// and set the current stream on that device to the passed stream. + /// Initializes the guard if it was not previously initialized. + void reset_stream(Stream stream) { + guard_.reset_stream(stream); + } + + /// Returns the stream that was set at the time the guard was most recently + /// initialized, or nullopt if the guard is uninitialized. + std::optional original_stream() const { + return guard_.original_stream(); + } + + /// Returns the most recent stream that was set using this stream guard, + /// either from construction, or via reset_stream, if the guard is + /// initialized, or nullopt if the guard is uninitialized. + std::optional current_stream() const { + return guard_.current_stream(); + } + + /// Restore the original device and stream, resetting this guard to + /// uninitialized state. + void reset() { + guard_.reset(); + } + + private: + c10::impl::InlineOptionalStreamGuard guard_{}; +}; + +/** + * A MultiStreamGuard is an RAII class that sets the current streams of a set of + * devices all at once, and resets them to their original values on destruction. + */ +struct MultiStreamGuard { + /// Set the current streams to the passed streams on each of their respective + /// devices. + explicit MultiStreamGuard(ArrayRef streams) : guard_(streams) {} + + /// Copy is disallowed + MultiStreamGuard(const MultiStreamGuard&) = delete; + MultiStreamGuard& operator=(const MultiStreamGuard&) = delete; + + // See Note [Move construction for RAII guards is tricky] + MultiStreamGuard(MultiStreamGuard&& other) = delete; + + // See Note [Move assignment for RAII guards is tricky] + MultiStreamGuard& operator=(MultiStreamGuard&& other) = delete; + ~MultiStreamGuard() = default; + + private: + c10::impl::InlineMultiStreamGuard guard_; +}; + +} // namespace c10 diff --git a/phivenv/Lib/site-packages/torch/include/c10/core/SymBool.h b/phivenv/Lib/site-packages/torch/include/c10/core/SymBool.h new file mode 100644 index 0000000000000000000000000000000000000000..acee686628f95cab6d41757d44a1d6739b16d8c8 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/core/SymBool.h @@ -0,0 +1,173 @@ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace c10 { + +class C10_API SymBool { + public: + /*implicit*/ SymBool(bool b) : data_(b) {} + SymBool(SymNode ptr) : data_(false), ptr_(std::move(ptr)) { + TORCH_CHECK(ptr_->is_bool()); + } + SymBool() : data_(false) {} + + SymNodeImpl* toSymNodeImplUnowned() const { + return ptr_.get(); + } + + SymNodeImpl* release() && { + return std::move(ptr_).release(); + } + + // Only valid if is_heap_allocated() + SymNode toSymNodeImpl() const; + + // Guaranteed to return a SymNode, wrapping using base if necessary + SymNode wrap_node(const SymNode& base) const; + + bool expect_bool() const { + std::optional c = maybe_as_bool(); + TORCH_CHECK(c.has_value()); + return *c; + } + + SymBool sym_and(const SymBool&) const; + SymBool sym_or(const SymBool&) const; + SymBool sym_not() const; + + SymBool operator&(const SymBool& other) const { + return sym_and(other); + } + SymBool operator|(const SymBool& other) const { + return sym_or(other); + } + SymBool operator||(const SymBool& other) const { + return sym_or(other); + } + SymBool operator~() const { + return sym_not(); + } + + // Insert a guard for the bool to be its concrete value, and then return + // that value. Note that C++ comparison operations default to returning + // bool, so it's not so common to have to call this + bool guard_bool(const char* file, int64_t line) const; + bool expect_true(const char* file, int64_t line) const; + bool guard_size_oblivious(const char* file, int64_t line) const; + bool statically_known_true(const char* file, int64_t line) const; + bool guard_or_false(const char* file, int64_t line) const; + bool guard_or_true(const char* file, int64_t line) const; + + bool has_hint() const; + + bool as_bool_unchecked() const { + return data_; + } + + std::optional maybe_as_bool() const { + if (!is_heap_allocated()) { + return data_; + } + return toSymNodeImplUnowned()->constant_bool(); + } + + bool is_heap_allocated() const { + return ptr_; + } + + private: + // TODO: optimize to union + bool data_; + SymNode ptr_; +}; + +C10_API std::ostream& operator<<(std::ostream& os, const SymBool& s); + +#define TORCH_SYM_CHECK(cond, ...) \ + TORCH_CHECK((cond).expect_true(__FILE__, __LINE__), __VA_ARGS__) +#define TORCH_SYM_INTERNAL_ASSERT(cond, ...) \ + TORCH_INTERNAL_ASSERT((cond).expect_true(__FILE__, __LINE__), __VA_ARGS__) +#define TORCH_MAYBE_SYM_CHECK(cond, ...) \ + if constexpr (std::is_same_v, SymBool>) { \ + TORCH_CHECK((cond).expect_true(__FILE__, __LINE__), __VA_ARGS__) \ + } else { \ + TORCH_CHECK((cond), __VA_ARGS__) \ + } + +inline bool guard_size_oblivious( + bool b, + const char* file [[maybe_unused]], + int64_t line [[maybe_unused]]) { + return b; +} + +inline bool guard_size_oblivious( + const c10::SymBool& b, + const char* file, + int64_t line) { + return b.guard_size_oblivious(file, line); +} + +inline bool guard_or_false( + bool b, + const char* file [[maybe_unused]], + int64_t line [[maybe_unused]]) { + return b; +} + +inline bool guard_or_false( + const c10::SymBool& b, + const char* file, + int64_t line) { + return b.guard_or_false(file, line); +} + +inline bool statically_known_true( + bool b, + const char* file [[maybe_unused]], + int64_t line [[maybe_unused]]) { + return b; +} + +inline bool statically_known_true( + const c10::SymBool& b, + const char* file, + int64_t line) { + return b.statically_known_true(file, line); +} + +inline bool guard_or_true( + bool b, + const char* file [[maybe_unused]], + int64_t line [[maybe_unused]]) { + return b; +} + +inline bool guard_or_true( + const c10::SymBool& b, + const char* file, + int64_t line) { + return b.guard_or_true(file, line); +} + +#define TORCH_GUARD_SIZE_OBLIVIOUS(cond) \ + c10::guard_size_oblivious((cond), __FILE__, __LINE__) + +#define TORCH_STATICALLY_KNOWN_TRUE(cond) \ + c10::statically_known_true((cond), __FILE__, __LINE__) + +#define TORCH_GUARD_OR_FALSE(cond) \ + c10::guard_or_false((cond), __FILE__, __LINE__) + +#define TORCH_GUARD_OR_TRUE(cond) c10::guard_or_true((cond), __FILE__, __LINE__) + +} // namespace c10 diff --git a/phivenv/Lib/site-packages/torch/include/c10/core/SymFloat.h b/phivenv/Lib/site-packages/torch/include/c10/core/SymFloat.h new file mode 100644 index 0000000000000000000000000000000000000000..d28c30bac4856af2350c65210d1f130fdbbf7c69 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/core/SymFloat.h @@ -0,0 +1,118 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace c10 { + +// NB: this is actually double precision; we're using the Python naming here +class C10_API SymFloat { + public: + /*implicit*/ SymFloat(double d) : data_(d) {} + SymFloat(SymNode ptr) + : data_(std::numeric_limits::quiet_NaN()), ptr_(std::move(ptr)) { + TORCH_CHECK(ptr_->is_float()); + } + SymFloat() : data_(0.0) {} + + SymNodeImpl* toSymNodeImplUnowned() const { + return ptr_.get(); + } + + SymNodeImpl* release() && { + return std::move(ptr_).release(); + } + + // Only valid if is_symbolic() + SymNode toSymNodeImpl() const; + + // Guaranteed to return a SymNode, wrapping using base if necessary + SymNode wrap_node(const SymNode& base) const; + + double expect_float() const { + TORCH_CHECK(!is_symbolic()); + return data_; + } + + SymFloat operator+(const SymFloat&) const; + SymFloat operator-(const SymFloat&) const; + SymFloat operator*(const SymFloat&) const; + SymFloat operator/(const SymFloat&) const; + + SymBool sym_eq(const SymFloat&) const; + SymBool sym_ne(const SymFloat&) const; + SymBool sym_lt(const SymFloat&) const; + SymBool sym_le(const SymFloat&) const; + SymBool sym_gt(const SymFloat&) const; + SymBool sym_ge(const SymFloat&) const; + + bool operator==(const SymFloat& o) const { + return sym_eq(o).guard_bool(__FILE__, __LINE__); + } + bool operator!=(const SymFloat& o) const { + return sym_ne(o).guard_bool(__FILE__, __LINE__); + } + bool operator<(const SymFloat& o) const { + return sym_lt(o).guard_bool(__FILE__, __LINE__); + } + bool operator<=(const SymFloat& o) const { + return sym_le(o).guard_bool(__FILE__, __LINE__); + } + bool operator>(const SymFloat& o) const { + return sym_gt(o).guard_bool(__FILE__, __LINE__); + } + bool operator>=(const SymFloat& o) const { + return sym_ge(o).guard_bool(__FILE__, __LINE__); + } + + SymFloat min(const SymFloat& sci) const; + SymFloat max(const SymFloat& sci) const; + + // Need guidance on where to put this code + SymFloat sqrt() const; + + // Insert a guard for the float to be its concrete value, and then return + // that value. This operation always works, even if the float is symbolic, + // so long as we know what the underlying value is. Don't blindly put this + // everywhere; you can cause overspecialization of PyTorch programs with + // this method. + // + // It should be called as guard_float(__FILE__, __LINE__). The file and line + // number can be used to diagnose overspecialization. + double guard_float(const char* file, int64_t line) const; + + bool has_hint() const; + + // N.B. It's important to keep this definition in the header + // as we expect if checks to be folded for mobile builds + // where `is_symbolic` is always false + C10_ALWAYS_INLINE bool is_symbolic() const { + return ptr_; + } + + // UNSAFELY coerce this SymFloat into a double. You MUST have + // established that this is a non-symbolic by some other means, + // typically by having tested is_symbolic(). You will get garbage + // from this function if is_symbolic() + double as_float_unchecked() const { + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(!is_symbolic()); + return data_; + } + + private: + // TODO: optimize to union + double data_; + SymNode ptr_; +}; + +C10_API std::ostream& operator<<(std::ostream& os, const SymFloat& s); +} // namespace c10 diff --git a/phivenv/Lib/site-packages/torch/include/c10/core/SymInt.h b/phivenv/Lib/site-packages/torch/include/c10/core/SymInt.h new file mode 100644 index 0000000000000000000000000000000000000000..1c92fc0a291320c57ff731a7531186ea75bf77ac --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/core/SymInt.h @@ -0,0 +1,422 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace c10 { + +class SymFloat; + +// SymInt represents either a regular int64_t, or a symbolic integer +// (represented in a type erased way as SymNode). The intention is for SymInt +// to represent symbolic sizes that arise when doing shape computation in +// operator kernels. This allows for tracing through programs without baking in +// concrete sizes into kernel calls. +// +// SymInt has an API equivalent to int64_t. In particular, it is a value type. +// Internally, SymInt is represented in a clever packed way, so that it only +// occupies one word of space; but morally, it is a union between an int64_t +// and an intrusive pointer to SymNodeImpl. +// +// Invariant: the referenced SymNodeImpl is guaranteed to be a SymNode where +// is_int() returns true + +class C10_API SymInt { + public: + enum Unchecked { + UNCHECKED, + }; + + /*implicit*/ SymInt(int64_t d) : data_(d) { + if (is_heap_allocated()) { + // Large negative number, heap allocate it + promote_to_negative(); + } + } + SymInt() : data_(0) {} + SymInt(SymNode n); + + // unchecked c-tor accepting raw `data_` + // One appropriate use for this is when you are constructing a symint + // in a situation where you know it is non-negative (or, if it is negative, + // the negative value is -1; i.e., not user controlled) + SymInt(Unchecked, int64_t d) : data_(d) {} + + // TODO: these implementations are not optimal because they allocate a + // temporary and then use the move constructor/assignment + SymInt(const SymInt& s) : data_(0) { + if (s.is_heap_allocated()) { + *this = SymInt(s.toSymNode()); + } else { + data_ = s.data_; + } + } + SymInt(SymInt&& s) noexcept : data_(s.data_) { + s.data_ = 0; + } + + SymInt& operator=(const SymInt& s) { + if (this != &s) { + if (s.is_heap_allocated()) { + *this = SymInt(s.toSymNode()); + } else { + data_ = s.data_; + } + } + return *this; + } + SymInt& operator=(SymInt&& s) noexcept { + if (this != &s) { + release_(); // release the current SymNode if any + data_ = s.data_; + if (s.is_heap_allocated()) + s.data_ = 0; + }; + return *this; + } + + SymNodeImpl* toSymNodeImplUnowned() const { + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(is_heap_allocated()); + uint64_t unextended_bits = static_cast(data_) & ~MASK; + uint64_t sign_bit_mask = 1ULL << (62 - 1); + // https://stackoverflow.com/questions/42534749/signed-extension-from-24-bit-to-32-bit-in-c + uint64_t extended_bits = (unextended_bits ^ sign_bit_mask) - sign_bit_mask; + return static_cast( + // NOLINTNEXTLINE(performance-no-int-to-ptr, bugprone*) + reinterpret_cast(static_cast(extended_bits))); + } + + void release_() { + if (is_heap_allocated()) { + SymNode::reclaim(toSymNodeImplUnowned()); // steal + } + } + + SymNodeImpl* release() && { +#ifndef C10_MOBILE + TORCH_INTERNAL_ASSERT(is_heap_allocated()); + auto* r = toSymNodeImplUnowned(); + data_ = 0; // transfer ownership + return r; +#else + TORCH_INTERNAL_ASSERT(false); +#endif + } + + // Only valid if is_heap_allocated() + SymNode toSymNode() const; + + // Guaranteed to return a SymNode, wrapping using base if necessary + SymNode wrap_node(const SymNode& base) const; + + ~SymInt() { + release_(); + } + + // Require the int to be non-symbolic, and if it is symbolic raise an + // error. This is safe to use for C++ code that doesn't work for symbolic + // shapes, and you don't have time to fix it immediately, as if we + // try to trigger the path in C++ you'll appropriately get an error + int64_t expect_int() const { + if (auto r = maybe_as_int()) { + return *r; + } + TORCH_CHECK_ALWAYS_SHOW_CPP_STACKTRACE( + false, "when unpacking SymInt, expected int but got ", *this); + } + + // Test if we have a hint for this int (e.g., guard_int would work). + // Most of the time this is true; it is only false when you have + // an unbacked SymInt. + bool has_hint() const; + + // Insert a guard for the int to be its concrete value, and then return + // that value. This operation always works, even if the int is symbolic, + // so long as we know what the underlying value is (e.g., this won't work + // if you call it on the size of nonzero output). Don't blindly put this + // everywhere; you can cause overspecialization of PyTorch programs with + // this method. + // + // It should be called as guard_int(__FILE__, __LINE__). The file and line + // number can be used to diagnose overspecialization. + int64_t guard_int(const char* file, int64_t line) const; + + // Insert a guard that this SymInt must be size-like, returning true if + // the integer actually is >= 0. Unlike manually performing a >= 0 test, + // if the SymInt in question is an unbacked SymInt (or, potentially in the + // future, if it contains unbacked SymInts), we will also treat the + // unbacked SymInt as statically testing >= 2 (which will prevent us from + // choking on, e.g., contiguity checks.) + bool expect_size(const char* file, int64_t line) const; + + // Distinguish actual symbolic values from constants stored on the heap + bool is_symbolic() const { + return is_heap_allocated() && + !toSymNodeImplUnowned()->constant_int().has_value(); + } + + // N.B. It's important to keep this definition in the header + // as we expect if checks to be folded for mobile builds + // where `is_heap_allocated` is always false and optimize dead code paths + C10_ALWAYS_INLINE bool is_heap_allocated() const { +#ifdef C10_MOBILE + return false; +#else + return !check_range(data_); +#endif + } + + SymInt operator+(const SymInt& sci) const; + SymInt operator-(const SymInt& sci) const; + SymInt operator*(const SymInt& sci) const; + SymInt operator/(const SymInt& sci) const; + SymInt operator%(const SymInt& sci) const; + void operator*=(const SymInt& sci); + void operator+=(const SymInt& sci); + void operator/=(const SymInt& sci); + + SymInt clone() const; + + SymBool sym_eq(const SymInt&) const; + SymBool sym_ne(const SymInt&) const; + SymBool sym_lt(const SymInt&) const; + SymBool sym_le(const SymInt&) const; + SymBool sym_gt(const SymInt&) const; + SymBool sym_ge(const SymInt&) const; + + bool operator==(const SymInt& o) const { + return sym_eq(o).guard_bool(__FILE__, __LINE__); + } + bool operator!=(const SymInt& o) const { + return sym_ne(o).guard_bool(__FILE__, __LINE__); + } + bool operator<(const SymInt& o) const { + return sym_lt(o).guard_bool(__FILE__, __LINE__); + } + bool operator<=(const SymInt& o) const { + return sym_le(o).guard_bool(__FILE__, __LINE__); + } + bool operator>(const SymInt& o) const { + return sym_gt(o).guard_bool(__FILE__, __LINE__); + } + bool operator>=(const SymInt& o) const { + return sym_ge(o).guard_bool(__FILE__, __LINE__); + } + + SymInt min(const SymInt& sci) const; + SymInt max(const SymInt& sci) const; + + // If both are symbolic, this checks if + // they share the same node. + // If both are not symbolic this just checks normal equality. + bool is_same(const SymInt& other) const; + + operator SymFloat() const; + + void unsafe_set_data(size_t nbytes) { + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(!is_heap_allocated()); + data_ = static_cast(nbytes); + } + + // Don't use this. Prefer maybe_as_int instead + int64_t as_int_unchecked() const { + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(!is_heap_allocated()); + return data_; + } + + std::optional maybe_as_int() const { + if (!is_heap_allocated()) { + return data_; + } + auto* node = toSymNodeImplUnowned(); + if (auto c = node->constant_int()) { + return c; + } + return node->maybe_as_int(); + } + + // Return whether the integer is directly coercible to a SymInt + // without requiring heap allocation. You don't need to use this + // to check if you can pass an integer to SymInt; this is guaranteed + // to work (it just might heap allocate!) + static bool check_range(int64_t i) { + return i > MAX_UNREPRESENTABLE_INT; + } + + // Return the min representable integer as a SymInt without + // heap allocation. For quantities that count bytes (or larger), + // this is still much larger than you need, so you may consider + // using this as a more efficient version of MIN_INT + static constexpr int64_t min_representable_int() { + return MAX_UNREPRESENTABLE_INT + 1; + } + + private: + void promote_to_negative(); + + // Constraints on the internal representation: + // + // - Should represent positive and small negative ints + // - No conversion necessary for operations on ints + // - Must represent valid 64-bit pointers + // - Is symbolic test should be FAST (two arithmetic instructions is too + // much). + // This code being a hotpath is based on Strobelight profiles of + // is_heap_allocated(). FB only: https://fburl.com/strobelight/5l50ncxd + // (you will need to change the time window). + // + // So, the scheme is to reserve large negative numbers (assuming + // two's complement): + // + // - 0b0.... means we are a positive int + // - 0b11... means we are a small negative int + // - 0b10... means we are are a pointer. This means that + // [-2^63, -2^62-1] are not representable as ints. + // We don't actually need all of this space as on x86_64 + // as the top 16bits aren't used for anything + static constexpr uint64_t MASK = 1ULL << 63 | 1ULL << 62 | 1ULL << 61; + static constexpr uint64_t IS_SYM = 1ULL << 63 | 1ULL << 61; + // We must manually translate the bit pattern test into a greater + // than test because compiler doesn't figure it out: + // https://godbolt.org/z/356aferaW + static constexpr int64_t MAX_UNREPRESENTABLE_INT = + -1LL & static_cast(~(1ULL << 62)); + int64_t data_; +}; + +/// Sum of a list of SymInt; accumulates into the c10::SymInt expression +template < + typename C, + typename std::enable_if_t< + std::is_same_v, + int> = 0> +inline c10::SymInt multiply_integers(const C& container) { + return std::accumulate( + container.begin(), + container.end(), + c10::SymInt(1), + [](const c10::SymInt& a, const c10::SymInt& b) { return a * b; }); +} + +template < + typename Iter, + typename = std::enable_if_t::value_type, + c10::SymInt>>> +inline c10::SymInt multiply_integers(Iter begin, Iter end) { + return std::accumulate( + begin, + end, + c10::SymInt(1), + [](const c10::SymInt& a, const c10::SymInt& b) { return a * b; }); +} + +#define DECLARE_SYMINT_OP_INTONLY(scalar_t, RetTy) \ + C10_API RetTy operator%(const SymInt& a, scalar_t b); \ + C10_API RetTy operator%(scalar_t a, const SymInt& b); + +#define DECLARE_SYMINT_OP(scalar_t, RetTy) \ + C10_API RetTy operator+(const SymInt& a, scalar_t b); \ + C10_API RetTy operator-(const SymInt& a, scalar_t b); \ + C10_API RetTy operator*(const SymInt& a, scalar_t b); \ + C10_API RetTy operator/(const SymInt& a, scalar_t b); \ + C10_API RetTy operator+(scalar_t a, const SymInt& b); \ + C10_API RetTy operator-(scalar_t a, const SymInt& b); \ + C10_API RetTy operator*(scalar_t a, const SymInt& b); \ + C10_API RetTy operator/(scalar_t a, const SymInt& b); \ + C10_API bool operator==(const SymInt& a, scalar_t b); \ + C10_API bool operator!=(const SymInt& a, scalar_t b); \ + C10_API bool operator<(const SymInt& a, scalar_t b); \ + C10_API bool operator<=(const SymInt& a, scalar_t b); \ + C10_API bool operator>(const SymInt& a, scalar_t b); \ + C10_API bool operator>=(const SymInt& a, scalar_t b); \ + C10_API bool operator==(scalar_t a, const SymInt& b); \ + C10_API bool operator!=(scalar_t a, const SymInt& b); \ + C10_API bool operator<(scalar_t a, const SymInt& b); \ + C10_API bool operator<=(scalar_t a, const SymInt& b); \ + C10_API bool operator>(scalar_t a, const SymInt& b); \ + C10_API bool operator>=(scalar_t a, const SymInt& b); + +DECLARE_SYMINT_OP_INTONLY(int64_t, SymInt) +DECLARE_SYMINT_OP_INTONLY(int32_t, SymInt) +DECLARE_SYMINT_OP_INTONLY(uint64_t, SymInt) +DECLARE_SYMINT_OP_INTONLY(uint32_t, SymInt) +DECLARE_SYMINT_OP(int64_t, SymInt) +DECLARE_SYMINT_OP(int32_t, SymInt) // make sure constants work +DECLARE_SYMINT_OP(uint64_t, SymInt) +DECLARE_SYMINT_OP(uint32_t, SymInt) +DECLARE_SYMINT_OP(double, SymFloat) +DECLARE_SYMINT_OP(float, SymFloat) // just for completeness + +// On OSX size_t is different than uint64_t so we have to +// define it separately +#if defined(__APPLE__) +DECLARE_SYMINT_OP_INTONLY(size_t, SymInt) +DECLARE_SYMINT_OP(size_t, SymInt) +#endif + +#undef DECLARE_SYMINT_OP + +C10_API std::ostream& operator<<(std::ostream& os, const SymInt& s); +C10_API SymInt operator-(const SymInt& s); + +inline bool sym_eq(int64_t a, int64_t b) { + return a == b; +} + +inline SymBool sym_eq(const SymInt& a, const SymInt& b) { + return a.sym_eq(b); +} + +inline bool sym_ne(int64_t a, int64_t b) { + return a != b; +} + +inline SymBool sym_ne(const SymInt& a, const SymInt& b) { + return a.sym_ne(b); +} + +inline bool sym_lt(int64_t a, int64_t b) { + return a < b; +} + +inline SymBool sym_lt(const SymInt& a, const SymInt& b) { + return a.sym_lt(b); +} + +inline bool sym_le(int64_t a, int64_t b) { + return a <= b; +} + +inline SymBool sym_le(const SymInt& a, const SymInt& b) { + return a.sym_le(b); +} + +inline bool sym_gt(int64_t a, int64_t b) { + return a > b; +} + +inline SymBool sym_gt(const SymInt& a, const SymInt& b) { + return a.sym_gt(b); +} + +inline bool sym_ge(int64_t a, int64_t b) { + return a >= b; +} + +inline SymBool sym_ge(const SymInt& a, const SymInt& b) { + return a.sym_ge(b); +} + +} // namespace c10 diff --git a/phivenv/Lib/site-packages/torch/include/c10/core/SymIntArrayRef.h b/phivenv/Lib/site-packages/torch/include/c10/core/SymIntArrayRef.h new file mode 100644 index 0000000000000000000000000000000000000000..3e9cd1aa91fefd4e0953e51cc6a106c2fff47211 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/core/SymIntArrayRef.h @@ -0,0 +1,89 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace c10 { +using SymIntArrayRef = ArrayRef; + +inline at::IntArrayRef asIntArrayRefUnchecked(c10::SymIntArrayRef ar) { + return IntArrayRef(reinterpret_cast(ar.data()), ar.size()); +} + +// TODO: a SymIntArrayRef containing a heap allocated large negative integer +// can actually technically be converted to an IntArrayRef... but not with +// the non-owning API we have here. We can't reinterpet cast; we have to +// allocate another buffer and write the integers into it. If you need it, +// we can do it. But I don't think you need it. + +inline std::optional asIntArrayRefSlowOpt( + c10::SymIntArrayRef ar) { + for (const c10::SymInt& sci : ar) { + if (sci.is_heap_allocated()) { + return std::nullopt; + } + } + + return {asIntArrayRefUnchecked(ar)}; +} + +inline at::IntArrayRef asIntArrayRefSlow( + c10::SymIntArrayRef ar, + const char* file, + int64_t line) { + for (const c10::SymInt& sci : ar) { + TORCH_CHECK( + !sci.is_heap_allocated(), + file, + ":", + line, + ": SymIntArrayRef expected to contain only concrete integers"); + } + return asIntArrayRefUnchecked(ar); +} + +// Even slower than asIntArrayRefSlow, as it forces an allocation for a +// destination int, BUT it is able to force specialization (it never errors) +inline c10::DimVector asIntArrayRefSlowAlloc( + c10::SymIntArrayRef ar, + const char* file, + int64_t line) { + c10::DimVector res(ar.size(), 0); + for (const auto i : c10::irange(ar.size())) { + res[i] = ar[i].guard_int(file, line); + } + return res; +} + +#define C10_AS_INTARRAYREF_SLOW(a) c10::asIntArrayRefSlow(a, __FILE__, __LINE__) +#define C10_AS_INTARRAYREF_SLOW_ALLOC(a) \ + c10::asIntArrayRefSlowAlloc(a, __FILE__, __LINE__) + +// Prefer using a more semantic constructor, like +// fromIntArrayRefKnownNonNegative +inline SymIntArrayRef fromIntArrayRefUnchecked(IntArrayRef array_ref) { + return SymIntArrayRef( + reinterpret_cast(array_ref.data()), array_ref.size()); +} + +inline SymIntArrayRef fromIntArrayRefKnownNonNegative(IntArrayRef array_ref) { + return fromIntArrayRefUnchecked(array_ref); +} + +inline SymIntArrayRef fromIntArrayRefSlow(IntArrayRef array_ref) { + for (long i : array_ref) { + TORCH_CHECK( + SymInt::check_range(i), + "IntArrayRef contains an int that cannot be represented as a SymInt: ", + i); + } + return SymIntArrayRef( + reinterpret_cast(array_ref.data()), array_ref.size()); +} + +} // namespace c10 diff --git a/phivenv/Lib/site-packages/torch/include/c10/core/SymNodeImpl.h b/phivenv/Lib/site-packages/torch/include/c10/core/SymNodeImpl.h new file mode 100644 index 0000000000000000000000000000000000000000..de0768de8563128e99da0719e04b511f9c4aa880 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/core/SymNodeImpl.h @@ -0,0 +1,261 @@ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Wunused-parameter") + +namespace c10 { + +class SymNodeImpl; +using SymNode = c10::intrusive_ptr; + +// When you add a method, you also need to edit +// torch/csrc/jit/python/init.cpp +// torch/csrc/utils/python_symnode.h +// c10/core/ConstantSymNodeImpl.h +class C10_API SymNodeImpl : public c10::intrusive_ptr_target { + public: + ~SymNodeImpl() override = default; + + template + c10::intrusive_ptr dyn_cast() const { + return c10::intrusive_ptr::reclaim_copy(dynamic_cast(this)); + } + + // these could be pure virtual when we implement LTC versions + virtual bool is_int() { + TORCH_CHECK(false, "NYI"); + } + virtual bool is_bool() { + TORCH_CHECK(false, "NYI"); + } + virtual bool is_float() { + TORCH_CHECK(false, "NYI"); + } + virtual bool is_nested_int() const { + return false; + } + virtual SymNode add(const SymNode& other) { + TORCH_CHECK(false, "NYI"); + } + virtual SymNode sub(const SymNode& other) { + TORCH_CHECK(false, "NYI"); + } + virtual SymNode mul(const SymNode& other) { + TORCH_CHECK(false, "NYI"); + } + // NB: legacy, prefer float_truediv or int_truediv + virtual SymNode truediv(const SymNode& other) { + TORCH_CHECK(false, "NYI"); + } + virtual SymNode float_truediv(const SymNode& other) { + return truediv(other); + } + virtual SymNode int_truediv(const SymNode& other) { + return truediv(other); + } + // NB: legacy, prefer float_pow or pow_by_natural + virtual SymNode pow(const SymNode& other) { + TORCH_CHECK(false, "NYI"); + } + virtual SymNode float_pow(const SymNode& other) { + return pow(other); + } + virtual SymNode pow_by_natural(const SymNode& other) { + return pow(other); + } + // NB: legacy, prefer int_floordiv + virtual SymNode floordiv(const SymNode& other) { + TORCH_CHECK(false, "NYI"); + } + virtual SymNode int_floordiv(const SymNode& other) { + return floordiv(other); + } + virtual SymNode mod(const SymNode& other) { + TORCH_CHECK(false, "NYI"); + } + virtual SymNode eq(const SymNode& other) { + TORCH_CHECK(false, "NYI"); + } + virtual SymNode ne(const SymNode& other) { + TORCH_CHECK(false, "NYI"); + } + virtual SymNode gt(const SymNode& other) { + TORCH_CHECK(false, "NYI"); + } + virtual SymNode lt(const SymNode& other) { + TORCH_CHECK(false, "NYI"); + } + virtual SymNode le(const SymNode& other) { + TORCH_CHECK(false, "NYI"); + } + virtual SymNode ge(const SymNode& other) { + TORCH_CHECK(false, "NYI"); + } + virtual SymNode ceil() { + TORCH_CHECK(false, "NYI"); + } + virtual SymNode floor() { + TORCH_CHECK(false, "NYI"); + } + virtual SymNode neg() { + TORCH_CHECK(false, "NYI"); + } + virtual SymNode sym_min(const SymNode& other) { + TORCH_CHECK(false, "NYI"); + } + virtual SymNode sym_max(const SymNode& other) { + TORCH_CHECK(false, "NYI"); + } + virtual SymNode sym_or(const SymNode& other) { + TORCH_CHECK(false, "NYI"); + } + virtual SymNode sym_and(const SymNode& other) { + TORCH_CHECK(false, "NYI"); + } + virtual SymNode sym_not() { + TORCH_CHECK(false, "NYI"); + } + virtual SymNode sym_ite(const SymNode& then_val, const SymNode& else_val) { + TORCH_CHECK(false, "NYI"); + } + // NB: self is ignored here, only the arguments are used + virtual SymNode is_contiguous( + ArrayRef sizes, + ArrayRef strides) { + TORCH_CHECK(false, "NYI"); + } + virtual SymNode is_channels_last_contiguous_2d( + ArrayRef sizes, + ArrayRef strides) { + TORCH_CHECK(false, "NYI"); + } + virtual SymNode is_channels_last_contiguous_3d( + ArrayRef sizes, + ArrayRef strides) { + TORCH_CHECK(false, "NYI"); + } + virtual SymNode is_channels_last_strides_2d( + ArrayRef sizes, + ArrayRef strides) { + TORCH_CHECK(false, "NYI"); + } + virtual SymNode is_channels_last_strides_3d( + ArrayRef sizes, + ArrayRef strides) { + TORCH_CHECK(false, "NYI"); + } + virtual SymNode is_non_overlapping_and_dense( + ArrayRef sizes, + ArrayRef strides) { + TORCH_CHECK(false, "NYI"); + } + virtual SymNode clone() { + TORCH_CHECK(false, "NYI"); + } + virtual SymNode sym_float() { + TORCH_CHECK(false, "NYI"); + } + virtual SymNode wrap_int(int64_t num) { + TORCH_CHECK(false, "NYI"); + } + virtual SymNode wrap_float(double num) { + TORCH_CHECK(false, "NYI"); + } + virtual SymNode wrap_bool(bool num) { + TORCH_CHECK(false, "NYI"); + } + virtual int64_t guard_int(const char* file, int64_t line) { + TORCH_CHECK(false, "NYI"); + } + virtual bool guard_bool(const char* file, int64_t line) { + TORCH_CHECK(false, "NYI"); + } + virtual double guard_float(const char* file, int64_t line) { + TORCH_CHECK(false, "NYI"); + } + virtual bool guard_size_oblivious(const char* file, int64_t line) { + // No improvement for unbacked SymBools by default, replace this + // with a better implementation! + return guard_bool(file, line); + } + virtual bool guard_or_false(const char* file, int64_t line) { + // Note: PT2 primarily uses PythonSymNodeImpl for this functionality. + // XLA is currently the main consumer of this fallback path since it uses + // ahead-of-time compilation and cannot depend on Python runtime. + return guard_bool(file, line); + } + virtual bool statically_known_true(const char* file, int64_t line) { + // Note: PT2 primarily uses PythonSymNodeImpl for this functionality. + // XLA is currently the main consumer of this fallback path since it uses + // ahead-of-time compilation and cannot depend on Python runtime. + return guard_bool(file, line); + } + virtual bool guard_or_true(const char* file, int64_t line) { + // Note: PT2 primarily uses PythonSymNodeImpl for this functionality. + // XLA is currently the main consumer of this fallback path since it uses + // ahead-of-time compilation and cannot depend on Python runtime. + return guard_bool(file, line); + } + virtual bool expect_true(const char* file, int64_t line) { + // No improvement for unbacked SymBools by default, replace this + // with a better implementation! + return guard_bool(file, line); + } + virtual bool expect_size(const char* file, int64_t line) { + // No improvement for unbacked SymInts by default, replace this + // with a better implementation! + return ge(wrap_int(0))->guard_bool(file, line); + } + virtual int64_t int_() { + TORCH_CHECK(false, "NYI"); + } + virtual bool bool_() { + TORCH_CHECK(false, "NYI"); + } + virtual bool has_hint() { + TORCH_CHECK(false, "NYI"); + } + virtual std::string str() { + TORCH_CHECK(false, "NYI"); + } + virtual std::string _graph_repr() { + return str(); + } + virtual std::optional nested_int() { + return std::nullopt; + } + virtual std::optional nested_int_coeff() { + return std::nullopt; + } + virtual std::optional constant_int() { + return std::nullopt; + } + virtual std::optional constant_bool() { + return std::nullopt; + } + virtual std::optional maybe_as_int() { + return std::nullopt; + } + virtual bool is_constant() { + return false; + } + virtual bool is_symbolic() { + return true; + } + std::ostream& operator<<(std::ostream& os) { + os << str(); + return os; + } +}; + +} // namespace c10 +C10_DIAGNOSTIC_POP() diff --git a/phivenv/Lib/site-packages/torch/include/c10/core/SymbolicShapeMeta.h b/phivenv/Lib/site-packages/torch/include/c10/core/SymbolicShapeMeta.h new file mode 100644 index 0000000000000000000000000000000000000000..939bff6443f9c06f4b9e2ab2cf6bb10a2dfa4b02 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/core/SymbolicShapeMeta.h @@ -0,0 +1,218 @@ +#pragma once +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace c10 { + +class C10_API SymbolicShapeMeta { + public: + // Basic metadata from which other quantities are derived + SymDimVector sizes_ = {0}; + SymDimVector strides_ = {1}; + SymInt storage_offset_ = 0; + + bool strides_valid_ = true; // e.g. for sparse where there are no strides + + SymbolicShapeMeta() = default; + ~SymbolicShapeMeta() = default; + SymbolicShapeMeta(const SymbolicShapeMeta& other); + SymbolicShapeMeta(SymbolicShapeMeta&& other) = delete; + SymbolicShapeMeta& operator=(const SymbolicShapeMeta& other) = delete; + SymbolicShapeMeta& operator=(SymbolicShapeMeta&& other) = delete; + + void refresh_numel() { + // Non-const, don't need to hold mutables_ lock + available_.fetch_and(~numel_avail); + numel_ = 1; + } + + void refresh_contiguous() { + // Non-const, don't need to hold mutables_ lock + available_.fetch_and(numel_avail); + is_contiguous_ = false; + is_channels_last_contiguous_ = false; + is_channels_last_3d_contiguous_ = false; + is_channels_last_ = false; + is_channels_last_3d_ = false; + is_non_overlapping_and_dense_ = false; + } + + int64_t dim() const { + return static_cast(sizes_.size()); + } + + // Accessors for derived quantities, computed lazily on first access + + bool has_numel() const { + return available_.load() & numel_avail; + } + bool has_is_contiguous() const { + return available_.load() & is_contiguous_avail; + } + bool has_is_channels_last_contiguous() const { + return available_.load() & is_channels_last_contiguous_avail; + } + bool has_is_channels_last_3d_contiguous() const { + return available_.load() & is_channels_last_3d_contiguous_avail; + } + bool has_is_channels_last() const { + return available_.load() & is_channels_last_avail; + } + bool has_is_channels_last_3d() const { + return available_.load() & is_channels_last_3d_avail; + } + bool has_is_non_overlapping_and_dense() const { + return available_.load() & is_non_overlapping_and_dense_avail; + } + + // Accessors to cached derived properties + // DO NOT call with mutables_ lock held + const SymInt& numel() const { + if (C10_UNLIKELY(!has_numel())) { + init_numel(); + } + return numel_; + } + + const SymBool& is_contiguous() const { + if (C10_UNLIKELY(!has_is_contiguous())) { + init_is_contiguous(); + } + return is_contiguous_; + } + + const SymBool& is_channels_last_contiguous() const { + if (C10_UNLIKELY(!has_is_channels_last_contiguous())) { + init_is_channels_last_contiguous(); + } + return is_channels_last_contiguous_; + } + + const SymBool& is_channels_last_3d_contiguous() const { + if (C10_UNLIKELY(!has_is_channels_last_3d_contiguous())) { + init_is_channels_last_3d_contiguous(); + } + return is_channels_last_3d_contiguous_; + } + + const SymBool& is_channels_last() const { + if (C10_UNLIKELY(!has_is_channels_last())) { + init_is_channels_last(); + } + return is_channels_last_; + } + + const SymBool& is_channels_last_3d() const { + if (C10_UNLIKELY(!has_is_channels_last_3d())) { + init_is_channels_last_3d(); + } + return is_channels_last_3d_; + } + + const SymBool& is_non_overlapping_and_dense() const { + if (C10_UNLIKELY(!has_is_non_overlapping_and_dense())) { + init_is_non_overlapping_and_dense(); + } + return is_non_overlapping_and_dense_; + } + + // Assumptions so we can short-circuit computation + // NOTE: Don't need to lock mutables_ since these aren't const + void assume_contiguous(SymBool val = true) { + is_contiguous_ = std::move(val); + available_.fetch_or(is_contiguous_avail); + } + void assume_channels_last_contiguous(SymBool val = true) { + is_contiguous_ = std::move(val); + available_.fetch_or(is_channels_last_contiguous_avail); + } + void assume_channels_last_3d_contiguous(SymBool val = true) { + is_channels_last_3d_contiguous_ = std::move(val); + available_.fetch_or(is_channels_last_3d_contiguous_avail); + } + void assume_channels_last(SymBool val = true) { + is_channels_last_ = std::move(val); + available_.fetch_or(is_channels_last_avail); + } + void assume_channels_last_3d(SymBool val = true) { + is_channels_last_3d_ = std::move(val); + available_.fetch_or(is_channels_last_3d_avail); + } + void assume_non_overlapping_and_dense(SymBool val = true) { + is_non_overlapping_and_dense_ = std::move(val); + available_.fetch_or(is_non_overlapping_and_dense_avail); + } + + private: + SymBool compute_contiguous() const; + SymBool compute_channels_last_contiguous_2d() const; + SymBool compute_channels_last_contiguous_3d() const; + SymBool compute_strides_like_channels_last_2d() const; + SymBool compute_strides_like_channels_last_3d() const; + SymBool compute_non_overlapping_and_dense() const; + + // These are little wrappers over the real compute_ functions that + // can make use of other contiguity fields to short circuit. + // They need to be implemented separately for SymBool, as SymBool does + // not short circuit. + // TODO: should the SymBool cases avoid the short circuit? Need to reason + // if its correct, and reason if the simpler expressions are better for + // analysis (maybe not!) + + SymBool compute_channels_last_contiguous_3d_dim5() const; + SymBool compute_channels_last_2d_dim5() const; + SymBool compute_channels_last_3d_dim5() const; + SymBool compute_is_non_overlapping_and_dense_dim4() const; + SymBool compute_is_non_overlapping_and_dense_dim5() const; + SymBool compute_is_non_overlapping_and_dense_anydim() const; + + void init_numel() const; + void init_is_contiguous() const; + void init_is_channels_last_contiguous() const; + void init_is_channels_last_3d_contiguous() const; + void init_is_channels_last() const; + void init_is_channels_last_3d() const; + void init_is_non_overlapping_and_dense() const; + + // NOTE: These only set if !has_foo() + void set_numel(SymInt val) const; + void set_is_contiguous(SymBool val) const; + void set_is_channels_last_contiguous(SymBool val) const; + void set_is_channels_last_3d_contiguous(SymBool val) const; + void set_is_channels_last(SymBool val) const; + void set_is_channels_last_3d(SymBool val) const; + void set_is_non_overlapping_and_dense(SymBool val) const; + + // Lazily initialized variables, with the corresponding available_ flag + // indicating whether the value has been initialized + mutable std::atomic available_{0}; + enum avail { + numel_avail = 1 << 0, + is_contiguous_avail = 1 << 1, + is_channels_last_contiguous_avail = 1 << 2, + is_channels_last_3d_contiguous_avail = 1 << 3, + is_channels_last_avail = 1 << 4, + is_channels_last_3d_avail = 1 << 5, + is_non_overlapping_and_dense_avail = 1 << 6, + }; + + // Mutex to prevent races when initializing the variable from const accessors + mutable std::mutex mutables_; + mutable SymInt numel_ = 1; + mutable SymBool is_contiguous_{true}; + mutable SymBool is_channels_last_contiguous_{false}; + mutable SymBool is_channels_last_3d_contiguous_{false}; + mutable SymBool is_channels_last_{false}; + mutable SymBool is_channels_last_3d_{false}; + mutable SymBool is_non_overlapping_and_dense_{true}; +}; + +} // namespace c10 diff --git a/phivenv/Lib/site-packages/torch/include/c10/core/TensorImpl.h b/phivenv/Lib/site-packages/torch/include/c10/core/TensorImpl.h new file mode 100644 index 0000000000000000000000000000000000000000..9b68951a34fd1b99f4e41b72a6de6a24dc033fb7 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/core/TensorImpl.h @@ -0,0 +1,3260 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// A global boolean variable to control whether we free memory when a Tensor +// is shrunk to a smaller size. As a result, a Tensor is always going to +// keep the memory allocated for its maximum capacity reshaped to so far. +// +// This parameter is respected "upper-case" methods which call Resize() +// (e.g., CopyFrom, ResizeLike); it is NOT respected by Tensor::resize_ +// or ShrinkTo, both of which guarantee to never to free memory. +C10_DECLARE_bool(caffe2_keep_on_shrink); + +// Since we can have high variance in blob memory allocated across different +// inputs in the same run, we will shrink the blob only if the memory gain +// is larger than this flag in bytes. This only applies to functions which +// respect caffe2_keep_on_shrink. +C10_DECLARE_int64(caffe2_max_keep_on_shrink_memory); + +namespace at { +class Tensor; +class TensorBase; +} // namespace at + +namespace c10 { + +/** + * A utility function to convert vector to vector. + */ +inline std::vector ToVectorint64_t(const ArrayRef& src) { + return std::vector(src.begin(), src.end()); +} + +/** + * Return product of all dimensions starting from k + */ +inline int64_t size_from_dim_(int k, IntArrayRef dims) { + int64_t r = 1; + for (const auto i : c10::irange(k, dims.size())) { + r *= dims[i]; + } + return r; +} + +// Product of all dims up to k (not including dims[k]) +inline int64_t size_to_dim_(int k, IntArrayRef dims) { + TORCH_CHECK(k >= 0 && static_cast(k) <= dims.size()); + int64_t r = 1; + for (const auto i : c10::irange(k)) { + r *= dims[i]; + } + return r; +} + +// Product of all dims between k and l (not including dims[k] and dims[l]) +inline int64_t size_between_dim_(int k, int l, IntArrayRef dims) { + TORCH_CHECK((unsigned)l < dims.size() && (unsigned)k < dims.size()); + int64_t r = 1; + if (k < l) { + for (int i = k + 1; i < l; ++i) { + r *= dims[i]; + } + } else { + for (int i = l + 1; i < k; ++i) { + r *= dims[i]; + } + } + return r; +} + +// Wrap around axis_index if it is negative, s.t., -1 is the last dim +inline int canonical_axis_index_(int axis_index, int ndims) { + TORCH_CHECK(axis_index >= -ndims); + TORCH_CHECK(axis_index < ndims); + if (axis_index < 0) { + return axis_index + ndims; + } + return axis_index; +} + +using PlacementDtor = void (*)(void*, size_t); + +/* + * A Context that will call extra placement deleter during + * deconstruction. + * + * Accept a already constructed DataPtr and store it as member + * during destruction, we'll call extra deleter on the underlying + * data pointer before the DataPtr is destructed. + * `data_ptr_` owns the memory. + */ +struct C10_API PlacementDeleteContext { + DataPtr data_ptr_; + PlacementDtor placement_dtor_; + size_t size_; + + PlacementDeleteContext( + DataPtr&& data_ptr, + PlacementDtor placement_dtor, + size_t size) + : data_ptr_(std::move(data_ptr)), + placement_dtor_(placement_dtor), + size_(size) {} + + PlacementDeleteContext(PlacementDeleteContext&&) noexcept = delete; + PlacementDeleteContext(const PlacementDeleteContext&) = delete; + PlacementDeleteContext& operator=(const PlacementDeleteContext&) = delete; + PlacementDeleteContext& operator=(PlacementDeleteContext&&) = delete; + static DataPtr makeDataPtr( + DataPtr&& data_ptr, + PlacementDtor placement_dtor, + size_t size, + Device device); + ~PlacementDeleteContext() { + placement_dtor_(data_ptr_.get(), size_); + // original memory will be freed when data_ptr_ is destructed + } +}; + +struct C10_API AutogradMetaInterface { + virtual void set_requires_grad( + bool requires_grad, + at::TensorImpl* self_impl) = 0; + virtual bool requires_grad() const = 0; + virtual at::Tensor& mutable_grad() = 0; + virtual const at::Tensor& grad() const = 0; + virtual const at::Tensor& fw_grad(uint64_t level, const at::TensorBase& self) + const = 0; + virtual void set_fw_grad( + const at::TensorBase& new_grad, + const at::TensorBase& self, + uint64_t level, + bool is_inplace_op) = 0; + virtual ~AutogradMetaInterface(); +}; + +namespace impl { + +// Unfortunately, the definition of AutogradMeta lives in a separate +// compilation unit than TensorImpl (libtorch.so versus libc10.so) +// which means that we cannot construct an AutogradMeta from TensorImpl, +// not even from the cpp file. So we have to indirect it through a factory +// function which will be initialized when we load libtorch.so. + +struct C10_API AutogradMetaFactory { + virtual ~AutogradMetaFactory() = default; + virtual std::unique_ptr make() const = 0; + // This method is the dumbest method. But I don't have access + // to Tensor (not TensorImpl) which is undefined in this header. + virtual const at::Tensor& undefined_tensor() const = 0; +}; + +C10_API void SetAutogradMetaFactory(AutogradMetaFactory* factory); +C10_API AutogradMetaFactory* GetAutogradMetaFactory(); + +struct C10_API AutogradMetaFactoryRegisterer{ + explicit AutogradMetaFactoryRegisterer(AutogradMetaFactory * factory){ + SetAutogradMetaFactory(factory); +} // namespace impl +}; // namespace c10 + +} // namespace impl + +struct C10_API NamedTensorMetaInterface { + virtual ~NamedTensorMetaInterface() = default; + virtual std::unique_ptr clone() const { + TORCH_INTERNAL_ASSERT( + false, "Not implemented: NamedTensorMetaInterface::clone"); + } + virtual int64_t slow_dim() const { + TORCH_INTERNAL_ASSERT( + false, "Not implemented: NamedTensorMetaInterface::slow_dim"); + } +}; + +// For ease of copy pasting +#if 0 +is_contiguous +is_channels_last_contiguous +is_channels_last_3d_contiguous +is_channels_last +is_channels_last_3d +is_non_overlapping_and_dense +#endif + +/** + * This structure is intended to hold additional metadata of the specific device + * backend. + **/ +struct C10_API BackendMeta : intrusive_ptr_target { + ~BackendMeta() override = default; + virtual intrusive_ptr clone( + const intrusive_ptr& ptr) const { + return ptr; + } +}; + +struct C10_API ExtraMeta { + std::unique_ptr symbolic_shape_meta_ = nullptr; + std::unique_ptr named_tensor_meta_ = nullptr; + intrusive_ptr backend_meta_ = nullptr; + std::optional custom_data_ptr_error_msg_ = std::nullopt; + std::optional custom_storage_error_msg_ = std::nullopt; + + ExtraMeta() = default; + ~ExtraMeta() = default; + ExtraMeta(const ExtraMeta& other) { + if (other.symbolic_shape_meta_) { + symbolic_shape_meta_ = + std::make_unique(*other.symbolic_shape_meta_); + } + if (other.named_tensor_meta_) { + named_tensor_meta_ = other.named_tensor_meta_->clone(); + } + if (other.backend_meta_) { + backend_meta_ = other.backend_meta_->clone(other.backend_meta_); + } + if (other.custom_data_ptr_error_msg_) { + custom_data_ptr_error_msg_ = other.custom_data_ptr_error_msg_; + } + if (other.custom_storage_error_msg_) { + custom_storage_error_msg_ = other.custom_storage_error_msg_; + } + } + ExtraMeta& operator=(const ExtraMeta& other) = delete; + ExtraMeta(ExtraMeta&& other) = delete; + ExtraMeta& operator=(ExtraMeta&& other) = delete; + + ExtraMeta( + std::unique_ptr symbolic_shape_meta, + std::unique_ptr named_tensor_meta, + intrusive_ptr backend_meta, + std::optional custom_data_ptr_error_msg = std::nullopt, + std::optional custom_storage_access_error_msg = std::nullopt) + : symbolic_shape_meta_(std::move(symbolic_shape_meta)), + named_tensor_meta_(std::move(named_tensor_meta)), + backend_meta_(std::move(backend_meta)), + custom_data_ptr_error_msg_(std::move(custom_data_ptr_error_msg)), + custom_storage_error_msg_(std::move(custom_storage_access_error_msg)) {} + + std::unique_ptr clone() const { + return std::make_unique(*this); + } +}; + +// NOTE [ Version Counter Sharing ] +// +// Every Tensor has a version counter. Version counters are incremented whenever +// the data or size of a tensor changes through in-place Variable operations. +// Version counters are used to detect modifications to saved variables which +// would result in incorrect gradient calculations. Version counters may be +// shared between Variables: +// +// 1. A view shares the version counter of the base Variable, +// 2. `x.detach()` shares the version counter of `x`, +// 3. Unpacked saved variables share the version counter of the source. +// +// Version counters are not shared in these scenarios: +// +// 1. When we replace a `Variable`'s underlying `Tensor` by calling +// `set_data(...)`, +// 2. `x.data` does not share the version counter of `x`. (See discussion at +// https://github.com/pytorch/pytorch/issues/5396) +// +// Question: Why do we put the version counter in TensorImpl instead of +// AutogradMeta? +// +// Answer: After the Variable/Tensor merge, a tensor will not have AutogradMeta +// when its `requires_grad_` is false, but when we use this tensor in the +// forward pass of a function that requires saving this tensor for backward, we +// need to keep track of this tensor's version to make sure it's always valid in +// the autograd graph. +// +// To achieve this goal, we put the version counter in TensorImpl instead of +// AutogradMeta, and have it always be available. This allows us to have the +// optimization of not carrying AutogradMeta when a tensor doesn't require +// gradient. +// +// A hypothetical alternative way to achieve this goal is to initialize +// AutogradMeta and create the version counter for the non-requires-grad tensor +// only when it's saved for backward. However, since saving a tensor for +// backward happens in the forward pass, and our invariant is that forward pass +// needs to be thread-safe, lazy-initializing AutogradMeta when saving a tensor +// can introduce race conditions when we are running the forward pass in +// multi-thread scenarios, thus making the forward pass not thread-safe anymore, +// which breaks the invariant. +struct C10_API VariableVersion { + private: + struct VersionCounter : intrusive_ptr_target { + VersionCounter(uint32_t version) : version_(version) {} + std::atomic version_; + }; + c10::intrusive_ptr version_counter_; + + public: + // Note [Disabled VariableVersion] + // VariableVersion struct has an intrusive_ptr pointing VersionCounter struct + // with an atomic variable. Thus `VariableVersion(/*version=*/0)` is not as + // cheap as we expected. In some cases constructing a VariableVersion with + // version 0 is not necessary so we add a cheap constructor which + // doesn't allocate the intrusive_ptr. + // Example use cases are: + // - Inference tensors don't track version counter, so they'll just always + // have disabled VariableVersion. + // - In SavedVariable class we override version_counter_ inside its + // constructor + // so that we can use the cheap constructor there. + enum Disabled { DISABLED }; + // It's okay to return true even for inference tensor which + // doesn't have version counter enabled. + // We want to be permissive here since in many cases (e.g. make_variable) + // we can std::move a TensorImpl if there's no other uses which saves us + // an additional TensorImpl allocation. + bool unique() const { + return version_counter_ ? 1 == version_counter_.use_count() : true; + } + // NOTE: As of C++11 and 14, default-constructing a std::atomic variable + // leaves it in a persistently undefined state. See + // https://cplusplus.github.io/LWG/issue2334. + VariableVersion(uint32_t version) + : version_counter_(c10::make_intrusive(version)) {} + VariableVersion(Disabled = DISABLED) {} + + bool enabled() const { + return version_counter_; + } + + // Note [Inplace update inference tensor] + // 1. Inplace update to inference tensor is forbidden in normal mode. + // For example: + // inference_tensor.copy_(normal_tensor_requires_grad) + // This inplace makes inference_tensor have requires_grad=True and + // have a grad_fn. This is bad because views of `inference_tensor` + // created in InferenceMode won't be able to know the grad_fn since + // their ViewMeta were not recorded. To match NoGradMode behavior + // that "inplace update to a view created in NoGradMode raise an error", + // we just ban inplace update to inference tensor since we can't tell + // if an inference tensor is a view created in InferenceMode. + // + // Note that views of normal tensor created in InferenceMode has proper + // ViewMeta so that they're aware of the grad_fn correctly. + // + // 2. Inplace update to inference tensor in inference tensor doesn't bump + // version counter. + // * It either doesn't call bump() by skipping ADInplaceOrView kernel, + // - e.g. inference_tensor.add_(1) + // * or bump() is a no-op for inference tensor. + // - e.g. inference_tensor.add_(normal_tensor) + void bump() { + // TODO: Replace the link to the documentation once it's available. + TORCH_CHECK( + version_counter_ || InferenceMode::is_enabled(), + "Inplace update to inference tensor outside InferenceMode is not allowed." + "You can make a clone to get a normal tensor before doing inplace update." + "See https://github.com/pytorch/rfcs/pull/17 for more details."); + if (version_counter_) { + ++version_counter_->version_; + } + } + + void set_version(int64_t i) { + TORCH_CHECK( + version_counter_, + "Tried to call torch.autograd._unsafe_set_version() on a tensor " + "that does not have a version counter. Was it created in inference mode?"); + TORCH_CHECK(i >= 0, "Cannot set a version_counter to a value below 0: ", i); + version_counter_->version_ = i; + } + + // Inference tensor doesn't have version counter so it shouldn't be + // accessed. + uint32_t current_version() const { + TORCH_CHECK( + version_counter_, "Inference tensors do not track version counter."); + return version_counter_->version_; + } +}; + +// Forward declaration of TensorImpl needed for forward declaration of +// C10_TensorImpl_Size_Check_Dummy_Class +struct C10_API TensorImpl; + +/** + * NOTE: Some TensorImpl methods are small and not overridden in the + * PyTorch codebase itself, but may theoretically need to be + * overridden by third-party TensorImpl subclasses. This macro allows + * users that need maximum performance and don't need these extension + * points to disable them with a build-time flag. (In particular, + * XLA's XLATensorImpl currently overrides these methods, so we can't + * enable this flag by default.) + */ +#ifdef C10_DISABLE_TENSORIMPL_EXTENSIBILITY +#define TENSORIMPL_MAYBE_VIRTUAL +#else +#define TENSORIMPL_MAYBE_VIRTUAL virtual +#endif + +/** + * The low-level representation of a tensor, which contains a pointer + * to a storage (which contains the actual data) and metadata (e.g., sizes and + * strides) describing this particular view of the data as a tensor. + * + * Some basic characteristics about our in-memory representation of + * tensors: + * + * - It contains a pointer to a storage struct (Storage/StorageImpl) + * which contains the pointer to the actual data and records the + * data type and device of the view. This allows multiple tensors + * to alias the same underlying data, which allows to efficiently + * implement differing *views* on a tensor. + * + * - The tensor struct itself records view-specific metadata about + * the tensor, e.g., sizes, strides and offset into storage. + * Each view of a storage can have a different size or offset. + * + * - This class is intrusively refcounted. It is refcounted so that + * we can support prompt deallocation of large tensors; it is + * intrusively refcounted so that we can still perform reference + * counted operations on raw pointers, which is often more convenient + * when passing tensors across language boundaries. + * + * - For backwards-compatibility reasons, a tensor may be in an + * uninitialized state. A tensor may be uninitialized in the following + * two ways: + * + * - A tensor may be DTYPE UNINITIALIZED. A tensor of this + * form has an uninitialized dtype. This situation most + * frequently arises when a user writes Tensor x(CPU). The dtype + * is subsequently initialized when mutable_data() is + * invoked for the first time. + * + * - A tensor may be STORAGE UNINITIALIZED. A tensor of this form + * has non-zero size, but has a storage with a null data pointer. + * This situation most frequently arises when a user calls + * Resize() or FreeMemory(). This is because Caffe2 historically + * does lazy allocation: allocation of data doesn't occur until + * mutable_data() is invoked. A tensor with zero size is + * always storage initialized, because no allocation is necessary + * in this case. + * + * All combinations of these two uninitialized states are possible. + * Consider the following transcript in idiomatic Caffe2 API: + * + * Tensor x(CPU); // x is storage-initialized, dtype-UNINITIALIZED + * x.Resize(4); // x is storage-UNINITIALIZED, dtype-UNINITIALIZED + * x.mutable_data(); // x is storage-initialized, dtype-initialized + * x.FreeMemory(); // x is storage-UNINITIALIZED, dtype-initialized. + * + * All other fields on tensor are always initialized. In particular, + * size is always valid. (Historically, a tensor declared as Tensor x(CPU) + * also had uninitialized size, encoded as numel == -1, but we have now + * decided to default to zero size, resulting in numel == 0). + * + * Uninitialized storages MUST be uniquely owned, to keep our model + * simple. Thus, we will reject operations which could cause an + * uninitialized storage to become shared (or a shared storage to + * become uninitialized, e.g., from FreeMemory). + * + * In practice, tensors which are storage-UNINITIALIZED and + * dtype-UNINITIALIZED are *extremely* ephemeral: essentially, + * after you do a Resize(), you basically always call mutable_data() + * immediately afterwards. Most functions are not designed to + * work if given a storage-UNINITIALIZED, dtype-UNINITIALIZED tensor. + * + * We intend to eliminate all uninitialized states, so that every + * tensor is fully initialized in all fields. Please do not write new code + * that depends on these uninitialized states. + */ +struct C10_API TensorImpl : public c10::intrusive_ptr_target { + TensorImpl() = delete; + ~TensorImpl() override; + // Note [Enum ImplType] + // This enum is temporary. In the followup refactor we should + // think about how to specialize TensorImpl creation for view + // tensors. Currently we only special case its key_set_ but + // there's also potential to share version_counter_ directly + // without creating first and then override in as_view. + enum ImplType { VIEW }; + + /** + * Construct a 1-dim 0-size tensor backed by the given storage. + */ + TensorImpl( + Storage&& storage, + DispatchKeySet, + const caffe2::TypeMeta data_type); + + // See Note [Enum ImplType] + TensorImpl( + ImplType, + Storage&& storage, + DispatchKeySet, + const caffe2::TypeMeta data_type); + + /** + * Construct a 1-dim 0 size tensor that doesn't have a storage. + */ + TensorImpl( + DispatchKeySet, + const caffe2::TypeMeta data_type, + std::optional device_opt); + + // Legacy constructors so I don't have to go update call sites. + // TODO: When Variable is added, delete these constructors + TensorImpl( + Storage&& storage, + DispatchKey dispatch_key, + const caffe2::TypeMeta data_type) + : TensorImpl( + std::move(storage), + DispatchKeySet(dispatch_key), + data_type) {} + TensorImpl( + DispatchKey dispatch_key, + const caffe2::TypeMeta data_type, + std::optional device_opt) + : TensorImpl(DispatchKeySet(dispatch_key), data_type, device_opt) {} + + private: + // This constructor is private, because the data_type is redundant with + // storage. Still, we pass it in separately because it's easier to write + // the initializer list if we're not worried about storage being moved out + // from under us. + TensorImpl( + Storage&& storage, + DispatchKeySet, + const caffe2::TypeMeta data_type, + std::optional); + + public: + TensorImpl(const TensorImpl&) = delete; + TensorImpl& operator=(const TensorImpl&) = delete; + TensorImpl(TensorImpl&&) = delete; + TensorImpl& operator=(TensorImpl&&) = delete; + + /** + * Release (decref) storage, and any other external allocations. This + * override is for `intrusive_ptr_target` and is used to implement weak + * tensors. + */ + void release_resources() override; + + public: + /** + * Return the DispatchKeySet corresponding to this Tensor, specifying + * all of the DispatchKeys that this Tensor identifies as. This is the + * information used to dispatch operations on this tensor. + */ + DispatchKeySet key_set() const { + return key_set_; + } + + private: + [[noreturn]] void throw_cannot_call_with_symbolic(const char* meth) const; + + // NOTE: The general recipe for customizable methods is that the fastpath + // function (e.g., sizes()) does an unlikely policy test, and if doesn't + // trigger, it does the fast path implementation with no checks and going + // directly to on-TensorImpl fields. In particular, you never need to + // check ExtraMeta if the policy doesn't trigger, as non-trivial ExtraMeta + // implies the policy will always match. + // + // The default implementations of methods are "safe": they do extra tests + // to make sure the internal state is consistent no matter if you are + // doing symbolic shapes or not. If you don't want the tests, directly + // override the custom method (e.g., custom_sizes()) to do your preferred + // behavior. + + public: + /** + * Return a reference to the sizes of this tensor. This reference remains + * valid as long as the tensor is live and not resized. + */ + IntArrayRef sizes() const { + if (C10_UNLIKELY(matches_policy(SizesStridesPolicy::CustomSizes))) { + return sizes_custom(); + } + return sizes_and_strides_.sizes_arrayref(); + } + + SymIntArrayRef sym_sizes() const { + if (C10_UNLIKELY(matches_policy(SizesStridesPolicy::CustomSizes))) { + return sym_sizes_custom(); + } + // Sizes guaranteed to be non-negative, so unchecked cast is OK + return c10::fromIntArrayRefKnownNonNegative( + sizes_and_strides_.sizes_arrayref()); + } + + IntArrayRef sizes_default() const { + if (C10_UNLIKELY(has_symbolic_sizes_strides_)) { + throw_cannot_call_with_symbolic("sizes"); + } + return sizes_and_strides_.sizes_arrayref(); + } + + SymIntArrayRef sym_sizes_default() const { + if (has_symbolic_sizes_strides_) { + return symbolic_shape_meta().sizes_; + } else { + // Sizes guaranteed to be non-negative, so unchecked cast is OK + return c10::fromIntArrayRefKnownNonNegative(sizes_default()); + } + } + + // From https://stackoverflow.com/a/3057522/23845 + // TODO: does C++14 have a stdlib template for this? + template + struct identity { + typedef T type; + }; + + template + ArrayRef generic_sizes() { + return _generic_sizes(identity()); + } + + ArrayRef _generic_sizes(identity) { + return sizes(); + } + ArrayRef _generic_sizes(identity) { + return sym_sizes(); + } + + template + ArrayRef generic_strides() { + return _generic_strides(identity()); + } + + ArrayRef _generic_strides(identity) { + return strides(); + } + ArrayRef _generic_strides(identity) { + return sym_strides(); + } + + template + T generic_storage_offset() { + return _generic_storage_offset(identity()); + } + + int64_t _generic_storage_offset(identity) { + return storage_offset(); + } + c10::SymInt _generic_storage_offset(identity) { + return sym_storage_offset(); + } + + /** + * The number of elements in a tensor. + * + * WARNING: Previously, if you were using the Caffe2 API, you could + * test numel() == -1 to see if a tensor was uninitialized. This + * is no longer true; numel always accurately reports the product + * of sizes of a tensor. + */ + int64_t numel() const { + if (C10_UNLIKELY(matches_policy(SizesStridesPolicy::CustomSizes))) { + return numel_custom(); + } + return numel_; + } + + c10::SymInt sym_numel() const { + if (C10_UNLIKELY(matches_policy(SizesStridesPolicy::CustomSizes))) { + return sym_numel_custom(); + } + return c10::SymInt(SymInt::UNCHECKED, numel_); + } + + int64_t numel_default() const { + if (C10_UNLIKELY(has_symbolic_sizes_strides_)) { + throw_cannot_call_with_symbolic("numel"); + } + return numel_; + } + + c10::SymInt sym_numel_default() const { + if (has_symbolic_sizes_strides_) { + return symbolic_shape_meta().numel(); + } else { + return c10::SymInt(SymInt::UNCHECKED, numel_); + } + } + + /** + * Return the number of dimensions of this tensor. Note that 0-dimension + * represents a Tensor that is a Scalar, e.g., one that has a single element. + */ + int64_t dim() const { + if (C10_UNLIKELY(matches_policy(SizesStridesPolicy::CustomSizes))) { + return dim_custom(); + } + return static_cast(sizes_and_strides_.size()); + } + + int64_t dim_default() const { + if (has_symbolic_sizes_strides_) { + return static_cast(symbolic_shape_meta().sizes_.size()); + } else { + return static_cast(sizes_and_strides_.size()); + } + } + + /** + * Return the offset in number of elements into the storage that this + * tensor points to. Most tensors have storage_offset() == 0, but, + * for example, an index into a tensor will have a non-zero storage_offset(). + * + * WARNING: This is NOT computed in bytes. + */ + int64_t storage_offset() const { + // TODO: maybe this should be toggled by strides + if (C10_UNLIKELY(matches_policy(SizesStridesPolicy::CustomSizes))) { + return storage_offset_custom(); + } + return storage_offset_; + } + + c10::SymInt sym_storage_offset() const { + if (C10_UNLIKELY(matches_policy(SizesStridesPolicy::CustomSizes))) { + return sym_storage_offset_custom(); + } + return c10::SymInt(SymInt::UNCHECKED, storage_offset_); + } + + int64_t storage_offset_default() const { + if (C10_UNLIKELY(has_symbolic_sizes_strides_)) { + throw_cannot_call_with_symbolic("storage_offset"); + } + return storage_offset_; + } + + c10::SymInt sym_storage_offset_default() const { + if (has_symbolic_sizes_strides_) { + return symbolic_shape_meta().storage_offset_; + } else { + return c10::SymInt(SymInt::UNCHECKED, storage_offset_); + } + } + + /** + * Return a reference to the strides of this tensor. This reference remains + * valid as long as the tensor is live and not restrided. + */ + IntArrayRef strides() const { + if (C10_UNLIKELY(matches_policy(SizesStridesPolicy::CustomStrides))) { + return strides_custom(); + } + return sizes_and_strides_.strides_arrayref(); + } + + c10::SymIntArrayRef sym_strides() const { + if (C10_UNLIKELY(matches_policy(SizesStridesPolicy::CustomStrides))) { + return sym_strides_custom(); + } + return c10::fromIntArrayRefKnownNonNegative(strides_default()); + } + + IntArrayRef strides_default() const { + if (C10_UNLIKELY(has_symbolic_sizes_strides_)) { + throw_cannot_call_with_symbolic("strides"); + } + return sizes_and_strides_.strides_arrayref(); + } + + c10::SymIntArrayRef sym_strides_default() const { + if (has_symbolic_sizes_strides_) { + return symbolic_shape_meta().strides_; + } else { + return c10::fromIntArrayRefKnownNonNegative(strides_default()); + } + } + + /** + * Whether or not a tensor is laid out in contiguous memory. + * + * Tensors with non-trivial strides are not contiguous. See + * compute_contiguous() for the exact definition of whether or not + * a tensor is contiguous or not. + */ + bool is_contiguous( + at::MemoryFormat memory_format = at::MemoryFormat::Contiguous) const { + if (C10_UNLIKELY(matches_policy(SizesStridesPolicy::CustomStrides))) { + return is_contiguous_custom(memory_format); + } + return is_contiguous_default(memory_format); + } + + // These are factored into separate functions in case subclasses + // want to use them + bool is_contiguous_default(at::MemoryFormat memory_format) const { + if (has_symbolic_sizes_strides_) { + if (memory_format == at::MemoryFormat::ChannelsLast) { + return symbolic_shape_meta().is_channels_last_contiguous().guard_bool( + __FILE__, __LINE__); + } else if (memory_format == at::MemoryFormat::ChannelsLast3d) { + return symbolic_shape_meta() + .is_channels_last_3d_contiguous() + .guard_bool(__FILE__, __LINE__); + } + return symbolic_shape_meta().is_contiguous().guard_bool( + __FILE__, __LINE__); + } + + if (memory_format == at::MemoryFormat::ChannelsLast) { + return is_channels_last_contiguous_; + } else if (memory_format == at::MemoryFormat::ChannelsLast3d) { + return is_channels_last_3d_contiguous_; + } + return is_contiguous_; + } + + bool is_strides_like_default(at::MemoryFormat memory_format) const { + if (has_symbolic_sizes_strides_) { + if (memory_format == at::MemoryFormat::ChannelsLast) { + return symbolic_shape_meta().is_channels_last().guard_bool( + __FILE__, __LINE__); + } else if (memory_format == at::MemoryFormat::ChannelsLast3d) { + return symbolic_shape_meta().is_channels_last_3d().guard_bool( + __FILE__, __LINE__); + } else { + return false; + } + } + + if (memory_format == at::MemoryFormat::ChannelsLast) { + return is_channels_last_; + } else if (memory_format == at::MemoryFormat::ChannelsLast3d) { + return is_channels_last_3d_; + } else { + return false; + } + } + + bool is_non_overlapping_and_dense_default() const { + if (has_symbolic_sizes_strides_) { + return symbolic_shape_meta().is_non_overlapping_and_dense().guard_bool( + __FILE__, __LINE__); + } else { + return is_non_overlapping_and_dense_; + } + } + + // NB: these dim accessor functions don't have _default(), as you can use + // sizes_default/strides_default + /** + * Return the size of a tensor at some dimension, wrapping the dimension if + * necessary. + * + * NOTE: if you know wrapping is unnecessary, do sizes()[d] instead; it will + * be faster + */ + int64_t size(int64_t d) const { + if (C10_UNLIKELY(matches_policy(SizesStridesPolicy::CustomSizes))) { + return size_custom(d); + } + d = maybe_wrap_dim(d, dim(), /*wrap_scalar=*/false); + return sizes_and_strides_.size_at_unchecked(d); + } + + c10::SymInt sym_size(int64_t d) const { + if (C10_UNLIKELY(matches_policy(SizesStridesPolicy::CustomSizes))) { + return sym_size_custom(d); + } + d = maybe_wrap_dim(d, dim(), /*wrap_scalar=*/false); + const auto sizes = this->sym_sizes(); + return sizes[d]; + } + + /** + * Return the stride of a tensor at some dimension, wrapping the dimension + * if necessary. + * + * NOTE: if you know wrapping is unnecessary, do sizes()[d] instead; it will + * be faster + */ + int64_t stride(int64_t d) const { + d = maybe_wrap_dim(d, dim(), false); + if (C10_UNLIKELY(matches_policy(SizesStridesPolicy::CustomStrides))) { + // TODO: provide stride_custom, symmetrically with size_custom. + // There is presently no user for it; only NestedTensor is using + // size_custom overrideability + return strides_custom()[d]; // unchecked (maybe_wrap_dim enforces bounds) + } + // Intentionally don't call default, which also handles symbolic + return sizes_and_strides_.stride_at_unchecked(d); + } + + enum class SizesStridesPolicy : uint8_t { + // Default behavior, e.g., dense tensor. + // + // Can override: nothing + Default = 0, + // Customizable strides behavior, e.g., sparse tensor, + // mkldnn tensor. + // + // Can override: strides(), is_contiguous() + CustomStrides = 1, + // Customizable sizes behavior, e.g., nested tensor + // + // Can override: strides(), is_contiguous(), sizes(), dim(), numel() + CustomSizes = 2 + }; + + protected: + inline bool matches_policy(SizesStridesPolicy policy) const { + return sizes_strides_policy_ >= static_cast(policy); + } + + inline bool matches_custom(SizesStridesPolicy policy) const { + return custom_sizes_strides_ >= static_cast(policy); + } + + inline bool matches_python_custom(SizesStridesPolicy policy) const { + auto r = python_custom_sizes_strides_ >= static_cast(policy); + if (r) { + TORCH_INTERNAL_ASSERT(is_python_dispatch()) + } + return r; + } + + /** + * Customization points for the functions above. sizes_strides_policy_ + * must be set to enable these. + * + * NB: dim is overridable separately from sizes because it is possible + * for a tensor to have rank, but not well defined sizes. + */ + // sizes_strides_policy_ >= CustomStrides + virtual bool is_contiguous_custom(at::MemoryFormat memory_format) const; + virtual bool is_strides_like_custom(at::MemoryFormat memory_format) const; + virtual bool is_non_overlapping_and_dense_custom() const; + // sizes_strides_policy_ >= CustomSizes + // Currently this method only exists to be overwritten by subclasses such as + // NestedTensorImpl. + virtual int64_t size_custom(int64_t d) const { + // TODO: We could add support to Python dispatch here. + // TODO: We could call into aten::size.int instead of + // sizes_custom()[d] and enable use of the dispatcher. + d = maybe_wrap_dim(d, dim(), /*wrap_scalar=*/false); + return sizes_custom()[d]; // unchecked (maybe_wrap_dim enforces bounds) + } + + virtual c10::SymInt sym_size_custom(int64_t d) const { + // TODO: We could add support to Python dispatch here. + // TODO: We could call into aten::size.int instead of + // sym_sizes_custom()[d] and enable use of the dispatcher. + d = maybe_wrap_dim(d, dim(), /*wrap_scalar=*/false); + return sym_sizes_custom()[d]; // unchecked (maybe_wrap_dim enforces bounds) + } + + virtual IntArrayRef sizes_custom() const; + virtual IntArrayRef strides_custom() const; + virtual int64_t numel_custom() const; + virtual int64_t storage_offset_custom() const; + virtual int64_t dim_custom() const; + virtual Device device_custom() const; + virtual Layout layout_custom() const; + + virtual c10::SymIntArrayRef sym_sizes_custom() const; + virtual c10::SymIntArrayRef sym_strides_custom() const; + virtual c10::SymInt sym_numel_custom() const; + virtual c10::SymInt sym_storage_offset_custom() const; + + public: + /** + * True if this tensor has storage. See storage() for details. + */ +#ifdef DEBUG + // Allow subclasses to check that their storage_ is never getting set in debug + // builds. + virtual +#else + TENSORIMPL_MAYBE_VIRTUAL +#endif + bool + has_storage() const + // NOTE: we devirtualize this because it arguably shouldn't be an + // error just to ask subclasses if they have storage. + // This used to throw for most subclasses, but OpaqueTensorImpl + // wanted it to successfully return false, so we went ahead and made + // it a non-error. +#ifdef C10_DISABLE_TENSORIMPL_EXTENSIBILITY + { + return storage_; + } +#else + ; +#endif + + /** + * Return the underlying storage of a Tensor. Multiple tensors may share + * a single storage. A Storage is an impoverished, Tensor-like class + * which supports far less operations than Tensor. + * + * Avoid using this method if possible; try to use only Tensor APIs to perform + * operations. + */ + TENSORIMPL_MAYBE_VIRTUAL const Storage& storage() const { + if (C10_UNLIKELY(storage_access_should_throw_)) { + throw_storage_access_error(); + } + return storage_; + } + + /** + * Return the underlying storage, unsafely assuming this is a basic strided + * tensor. In cases where `storage` access would throw, this returns a + * default-constructed Storage. + */ + inline const Storage& unsafe_storage() const { + return storage_; + } + + bool unique_version() const { + return version_counter_.unique(); + } + + protected: + virtual Layout layout_impl() const { + TORCH_CHECK( + false, "layout_impl is only implemented for TensorImpl subclasses."); + } + + public: + // Whether a tensor is sparse COO or not. + bool is_sparse() const { + // NB: This method is not virtual and avoid dispatches for performance + // reasons. + return key_set_.has_all(c10::sparse_ks); + } + + // Whether a tensor is sparse CSR or not. + bool is_sparse_csr() const { + return layout() == kSparseCsr; + } + + // Whether a tensor is sparse CSR/CSC/BSR/BSC or not. + bool is_sparse_compressed() const { + return key_set_.has_all(c10::sparse_csr_ks); + } + + bool is_quantized() const { + // NB: This method is not virtual and avoid dispatches for performance + // reasons. + constexpr auto quantized_ks = DispatchKeySet(DispatchKey::Quantized); + return key_set_.has_all(quantized_ks); + } + + bool is_meta() const { + // NB: This method is not virtual and avoid dispatches for performance + // reasons. + if (C10_UNLIKELY(device_policy_)) { + return device_custom().is_meta(); + } + return device_opt_.has_value() && device_opt_->type() == kMeta; + } + + bool is_cpu() const { + // NB: This method is not virtual and avoid dispatches for performance + // reasons. + if (C10_UNLIKELY(device_policy_)) { + return device_custom().is_cpu(); + } + // Note: we cannot rely on dispatch keys to determine the device type + // of a tensor, because "wrapper" tensors (like FunctionalTensorWrapper) + // don't include backend dispatch keys. + return device_opt_.has_value() && device_opt_->type() == kCPU; + } + + bool is_cuda() const { + // NB: This method is not virtual and avoid dispatches for performance + // reasons. + if (C10_UNLIKELY(device_policy_)) { + return device_custom().is_cuda(); + } + return device_opt_.has_value() && device_opt_->type() == kCUDA; + } + + bool is_xpu() const { + // NB: This method is not virtual and avoid dispatches for performance + // reasons. + if (C10_UNLIKELY(device_policy_)) { + return device_custom().is_xpu(); + } + return device_opt_.has_value() && device_opt_->type() == kXPU; + } + + bool is_ipu() const { + if (C10_UNLIKELY(device_policy_)) { + return device_custom().is_ipu(); + } + return device_opt_.has_value() && device_opt_->type() == kIPU; + } + + bool is_xla() const { + if (C10_UNLIKELY(device_policy_)) { + return device_custom().is_xla(); + } + return device_opt_.has_value() && device_opt_->type() == kXLA; + } + + bool is_mtia() const { + if (C10_UNLIKELY(device_policy_)) { + return device_custom().is_mtia(); + } + return device_opt_.has_value() && device_opt_->type() == kMTIA; + } + + bool is_hpu() const { + if (C10_UNLIKELY(device_policy_)) { + return device_custom().is_hpu(); + } + return device_opt_.has_value() && device_opt_->type() == kHPU; + } + + bool is_lazy() const { + if (C10_UNLIKELY(device_policy_)) { + return device_custom().is_lazy(); + } + return device_opt_.has_value() && device_opt_->type() == kLazy; + } + + bool is_hip() const { + // NB: This method is not virtual and avoid dispatches for performance + // reasons. + if (C10_UNLIKELY(device_policy_)) { + return device_custom().is_hip(); + } + return device_opt_.has_value() && device_opt_->type() == kHIP; + } + + bool is_ve() const { + // NB: This method is not virtual and avoid dispatches for performance + // reasons. + if (C10_UNLIKELY(device_policy_)) { + return device_custom().is_ve(); + } + return device_opt_.has_value() && device_opt_->type() == kVE; + } + + bool is_privateuseone() const { + // NB: This method is not virtual and avoid dispatches for performance + // reasons. + if (C10_UNLIKELY(device_policy_)) { + return device_custom().is_privateuseone(); + } + return device_opt_.has_value() && device_opt_->type() == kPrivateUse1; + } + + bool is_mkldnn() const { + return key_set_.has_all(c10::mkldnn_ks); + } + + bool is_vulkan() const { + if (C10_UNLIKELY(device_policy_)) { + return device_custom().is_vulkan(); + } + return device_opt_.has_value() && device_opt_->type() == kVulkan; + } + + bool is_metal() const { + if (C10_UNLIKELY(device_policy_)) { + return device_custom().is_metal(); + } + return device_opt_.has_value() && device_opt_->type() == kMetal; + } + + bool is_mps() const { + if (C10_UNLIKELY(device_policy_)) { + return device_custom().is_mps(); + } + return device_opt_.has_value() && device_opt_->type() == kMPS; + } + + bool is_maia() const { + if (C10_UNLIKELY(device_policy_)) { + return device_custom().is_maia(); + } + return device_opt_.has_value() && device_opt_->type() == kMAIA; + } + + bool is_nested() const { + return key_set_.has(DispatchKey::NestedTensor); + } + + // TODO: remove this once we don't automatically enabled Autograd dispatch + // keys + // in TensorImpl constructor. + // DON'T USE THIS API!! It's only created for testing purpose in + // file aten/src/ATen/core/boxing/impl/test_helpers.h + void remove_autograd_key() { + key_set_ = key_set_ - autograd_dispatch_keyset; + } + + // Inference tensor doesn't have autograd or ADInplaceOrView key. + // Invariant: + // Inference tensor has version_counter_.enabled() == false + bool is_inference() { + bool no_ADInplaceOrView = !key_set_.has_any(c10::inplace_or_view_ks); + bool no_Autograd = !key_set_.has_any(c10::autograd_dispatch_keyset); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY( + no_ADInplaceOrView == no_Autograd, + "ADInplaceOrView and Autograd keys must be on/off at the same time."); + return no_ADInplaceOrView && no_Autograd; + } + + DeviceIndex get_device() const { + if (C10_UNLIKELY(device_policy_)) { + return device_custom().index(); + } + return device_default().index(); + } + + Device device() const { + if (C10_UNLIKELY(device_policy_)) { + return device_custom(); + } + return device_default(); + } + + protected: + c10::Device device_default() const { + TORCH_CHECK(device_opt_.has_value(), "tensor does not have a device"); + // See NOTE [std::optional operator usage in CUDA] + return *device_opt_; + } + + public: + Layout layout() const { + if (C10_UNLIKELY(layout_policy_)) { + return layout_custom(); + } + + // NB: This method is not virtual and avoid dispatches for perf. + // strided is also the most common layout type, so we check for + // strided case first. + // This keyset must also be kept in sync with the logic in + // is_sparse() / is_sparse_csr() / is_mkldnn() + constexpr auto sparse_and_sparsecsr_and_mkldnn_ks = + c10::sparse_ks | c10::sparse_csr_ks | c10::mkldnn_ks; + if (!key_set_.has_any(sparse_and_sparsecsr_and_mkldnn_ks)) { + return kStrided; + } else if (is_sparse()) { + return kSparse; + } else if (is_sparse_compressed()) { + // Typically, the tensor dispatch keys define the tensor layout + // uniquely. This allows using non-virtual layout method for + // better performance. However, when tensor's layout depends, + // say, on tensor attributes, one must use this execution path + // where the corresponding tensor impl class overwrites virtual + // layout_impl() method. + // + // TODO: implement layout() as native function/method so that + // __torch_dispatch__ users will be able to redefine the + // layout() method. + return layout_impl(); + } else { + TORCH_INTERNAL_ASSERT( + is_mkldnn(), "There is an error in the layout calculation logic."); + return kMkldnn; + } + } + + /** + * True if a tensor was auto-wrapped from a C++ or Python number. + * For example, when you write 't + 2', 2 is auto-wrapped into a Tensor + * with `is_wrapped_number_` set to true. + * + * Wrapped numbers do not participate in the result type computation for + * mixed-type operations if there are any Tensors that are not wrapped + * numbers. This is useful, because we want 't + 2' to work with + * any type of tensor, not just LongTensor (which is what integers + * in Python represent). + * + * Otherwise, they behave like their non-wrapped equivalents. + * See [Result type computation] in TensorIterator.h. + * + * Why did we opt for wrapped numbers, as opposed to just having + * an extra function add(Tensor, Scalar)? This helps greatly reduce + * the amount of code we have to write for add, when actually + * a Tensor-Scalar addition is really just a Tensor-Tensor + * addition when the RHS is 0-dim (except for promotion behavior.) + */ + bool is_wrapped_number() const { + return is_wrapped_number_; + } + + /** + * Set whether or not a tensor was auto-wrapped from a C++ or Python + * number. You probably don't want to call this, unless you are + * writing binding code. + */ + void set_wrapped_number(bool value) { + TORCH_INTERNAL_ASSERT(dim() == 0); + is_wrapped_number_ = value; + } + + /** + * Returns true if Tensor supports as_strided and as_strided_backward. + * This is used in autograd to perform inplace update on view Tensors. + * See Note [View + Inplace update for base tensor] and + * [View + Inplace update for view tensor] for details. + * Note this method only returns true for XLA backend, where it + * simulates strided Tensor to support most view ops, but it cannot + * fully support general `as_strided` case. + * It can be expanded as needed in the future, e.g sparse Tensor. + */ + inline bool support_as_strided() const { + if (is_nested()) { + return false; + } + if (key_set_.has(DispatchKey::Functionalize)) { + return false; + } + return device().supports_as_strided(); + } + + // ~~~~~ Autograd API ~~~~~ + // Some methods below are defined in TensorImpl.cpp because Tensor is an + // incomplete type. + + /** + * Set whether or not a tensor requires gradient. + */ + void set_requires_grad(bool requires_grad); + + /** + * True if a tensor requires gradient. Tensors which require gradient + * have history tracked for any operations performed on them, so that + * we can automatically differentiate back to them. A tensor that + * requires gradient and has no history is a "leaf" tensor, which we + * accumulate gradients into. + */ + bool requires_grad() const; + + /** + * Return a mutable reference to the gradient. This is conventionally + * used as `t.grad() = x` to set a gradient to a completely new tensor. + */ + at::Tensor& mutable_grad(); + + /** + * Return the accumulated gradient of a tensor. This gradient is written + * into when performing backwards, when this tensor is a leaf tensor. + */ + const at::Tensor& grad() const; + + /** + * Whether or not the imaginary part of the tensor should be negated + */ + inline bool is_conj() const { + constexpr auto conjugate_ks = DispatchKeySet(DispatchKey::Conjugate); + return key_set_.has_all(conjugate_ks); + } + + /** + * Set whether or not to take the conjugate of the tensor (flip the imaginary + * bit). + */ + void _set_conj(bool value) { + if (value) { + key_set_ = key_set_.add(DispatchKey::Conjugate); + TORCH_INTERNAL_ASSERT(isComplexType(typeMetaToScalarType(dtype()))); + } else { + key_set_ = key_set_.remove(DispatchKey::Conjugate); + } + } + + /** + * XXX: do not use, private api! + * Update the backend component related keys to the backend component + * corresponding to this device. + */ + void _change_backend_component_keys(c10::Device device); + + /** + * Whether or not the tensor is a zerotensor + */ + inline bool _is_zerotensor() const { + constexpr auto zerotensor_ks = DispatchKeySet(DispatchKey::ZeroTensor); + return key_set_.has_all(zerotensor_ks); + } + + /** + Set whether or not the tensor is a zero tensor + */ + void _set_zero(bool value) { + if (value) { + TORCH_INTERNAL_ASSERT( + false, + "Please call `torch._efficientzerotensor` if you want to create a tensor with no storage."); + } else { + key_set_ = key_set_.remove(DispatchKey::ZeroTensor); + } + } + + /** + * Whether or not the tensor should be negated + */ + inline bool is_neg() const { + constexpr auto negative_ks = DispatchKeySet(DispatchKey::Negative); + return key_set_.has_all(negative_ks); + } + + /** + * Set whether or not to take the conjugate of the tensor (flip the imaginary + * bit). + */ + void _set_neg(bool value) { + if (value) { + key_set_ = key_set_.add(DispatchKey::Negative); + } else { + key_set_ = key_set_.remove(DispatchKey::Negative); + } + } + + /** + * Return the accumulated gradient of a tensor. This gradient is computed + * using forward mode AD. + * + * This is an internal API that should never be used by end users. + * + * The API is as follows: + * - "level" allows to specify the level of forward AD nesting for which the + * gradient should be returned. Note that since levels are not fully + * supported yet, this argument should be 0. See documentation for + * torch::autograd::enter_dual_level for more details about forward AD + * nesting. + * - "self" should represent the Tensor whose forward grad is accessed. It + * is required when dealing with view. + */ + const at::Tensor& _fw_grad(uint64_t level, const at::TensorBase& self) const; + + /** + * Sets the forward gradient for this Tensor. + * The given Tensor might not be used directly and its content will be copied. + * + * This is an internal API that should never be used by end users. + * + * The API is as follows: + * - "new_grad" is a Tensor containing the new value of the gradient that + * should be set + * - "self" should represent the Tensor whose forward grad is accessed. It + * is required when dealing with view. + * - "level" allows to specify the level of forward AD nesting for which the + * gradient should be set. Note that since levels are not fully supported + * yet, this argument should be 0. See documentation for + * torch::autograd::enter_dual_level for more details about forward AD + * nesting. + * - "is_inplace_op" is a boolean flag that tells if this gradient was + * generated by an inplace operation or an out of place one. This allows + * better error checking. + */ + void _set_fw_grad( + const at::TensorBase& new_grad, + const at::TensorBase& self, + uint64_t level, + bool is_inplace_op); + + /** + * Return a typed data pointer to the actual data which this tensor refers to. + * This checks that the requested type (from the template parameter) matches + * the internal type of the tensor. + * + * It is invalid to call data() on a dtype-uninitialized tensor, even if + * the size is 0. + * + * WARNING: If a tensor is not contiguous, you MUST use strides when + * performing index calculations to determine the location of elements in + * the tensor. We recommend using 'TensorAccessor' to handle this computation + * for you; this class is available from 'Tensor'. + */ + template + const T* data_dtype_initialized() const { + return data_dtype_initialized_impl( + [this] { return static_cast(storage_.data()); }); + } + + /** + * Return a mutable typed data pointer to the actual data which this + * tensor refers to. This checks that the requested type (from the + * template parameter) matches the internal type of the tensor. + * + * It is invalid to call data() on a dtype-uninitialized tensor, even if + * the size is 0. + * + * WARNING: If a tensor is not contiguous, you MUST use strides when + * performing index calculations to determine the location of elements in + * the tensor. We recommend using 'TensorAccessor' to handle this computation + * for you; this class is available from 'Tensor'. + */ + template + T* mutable_data_dtype_initialized() { + return data_dtype_initialized_impl( + [this] { return static_cast(storage_.mutable_data()); }); + } + + private: + // Shared implementation of data_dtype_initialized() and + // mutable_data_dtype_initialized(). + template + T* data_dtype_initialized_impl(const Func& get_data) const { + TORCH_CHECK( + data_type_.Match>(), + "Tensor type mismatch, caller expects elements to be ", + caffe2::TypeMeta::TypeName>(), + ", while tensor contains ", + data_type_.name(), + ". "); + return data_ptr_impl_impl(get_data); + } + + public: + /** + * More efficient helper for Tensor::data_ptr(). Like data(), but + * does not do a type check. Unlike the untemplated data(), does + * check has_storage() and storage_initialized(). + */ + template + inline const T* data_ptr_impl() const { + return data_ptr_impl_impl( + [this] { return static_cast(storage_.data()); }); + } + + /** + * More efficient helper for Tensor::data_ptr(). Like data(), but + * does not do a type check. Unlike the untemplated data(), does + * check has_storage() and storage_initialized(). + */ + template + inline T* mutable_data_ptr_impl() { + return data_ptr_impl_impl( + [this] { return static_cast(storage_.mutable_data()); }); + } + + private: + // Shared implementation of mutable_data_ptr_impl() and the future + // mutable_data_ptr_impl(). + template + __ubsan_ignore_pointer_overflow__ T* data_ptr_impl_impl( + const Func& get_data) const { + if (C10_UNLIKELY(!has_storage())) { + throw_data_ptr_access_error(); + } + TORCH_CHECK( + storage_initialized(), + "The tensor has a non-zero number of elements, but its data is not allocated yet.\n" + "If you're using torch.compile/export/fx, it is likely that we are erroneously " + "tracing into a custom kernel. To fix this, please wrap the custom kernel into " + "an opaque custom op. Please see the following for details: " + "https://pytorch.org/tutorials/advanced/custom_ops_landing_page.html\n" + "If you're using Caffe2, Caffe2 uses a lazy allocation, so you will need to call " + "mutable_data() or raw_mutable_data() to actually allocate memory."); + // Caller does the type check. + // Note: storage_offset_ can be non-null even for zero-elements tensors + // (for example if created as `torch.empty(5)[10:]`) that triggers + // applying non-zero offset to null pointer in UBSan + return get_data() + storage_offset_; + } + + public: + /** + * Return a const void* data pointer to the actual data which this + * tensor refers to. + * + * It is invalid to call data() on a dtype-uninitialized tensor, even if the + * size is 0. + * + * WARNING: The data pointed to by this tensor may not contiguous; do NOT + * assume that itemsize() * numel() is sufficient to compute the bytes that + * can be validly read from this tensor. + */ + inline const void* data() const { + return data_impl( + [this] { return static_cast(storage_.data()); }); + } + + /** + * Return a void* data pointer to the actual data which this tensor refers to. + * + * It is invalid to call mutable_data() on a dtype-uninitialized + * tensor, even if the size is 0. + * + * WARNING: The data pointed to by this tensor may not contiguous; do NOT + * assume that itemsize() * numel() is sufficient to compute the bytes that + * can be validly read from this tensor. + */ + inline void* mutable_data() { + return data_impl( + [this] { return static_cast(storage_.mutable_data()); }); + } + + private: + /// Shared implementation of data() and mutable_data(). + /// + /// get_data must return a byte-addressed pointer, e.g. char*, + /// std::byte const*, etc. + template + Void* data_impl(const Func& get_data) const { + if (C10_UNLIKELY(!has_storage())) { + throw_data_ptr_access_error(); + } + TORCH_CHECK( + dtype_initialized(), + "Cannot access data pointer of Tensor that doesn't have initialized dtype " + "(e.g., caffe2::Tensor x(CPU), prior to calling mutable_data() on x)"); + auto* data = get_data(); + static_assert( + sizeof(*data) == 1, "get_data must return a byte-addressed pointer."); + // Computing an offset into an empty tensor would be UB, since an empty + // tensor's storage will be nullptr, and adding a nonzero offset to nullptr + // is UB. So we skip the offset computation in this case. + if (is_empty()) { + return nullptr; + } + return data + data_type_.itemsize() * storage_offset_; + } + + public: + /** + * Returns the TypeMeta of a tensor, which describes what data type + * it is (e.g., int, float, ...) + */ + const caffe2::TypeMeta dtype() const { + return data_type_; + } + + /** + * Return the size of a single element of this tensor in bytes. + */ + size_t itemsize() const { + TORCH_CHECK( + dtype_initialized(), + "Cannot report itemsize of Tensor that doesn't have initialized dtype " + "(e.g., caffe2::Tensor x(CPU), prior to calling mutable_data() on x)"); + return data_type_.itemsize(); + } + + void set_backend_meta(intrusive_ptr backend_meta) { + get_extra_meta().backend_meta_ = std::move(backend_meta); + } + + c10::BackendMeta* get_backend_meta() { + if (!extra_meta_) { + return nullptr; + } + return extra_meta_->backend_meta_.get(); + } + + intrusive_ptr get_backend_meta_intrusive_ptr() const { + if (!extra_meta_) { + return nullptr; + } + return extra_meta_->backend_meta_; + } + + void release_storage_and_set_meta_custom_data_ptr_error_msg_( + std::optional s) { + storage_ = {}; + set_storage_access_should_throw(); + get_extra_meta().custom_data_ptr_error_msg_ = s; + get_extra_meta().custom_storage_error_msg_ = std::move(s); + } + + protected: + /** + * Returns the human-readable name of the actual type of this object (e.g., + * TensorImpl, BatchedTensorImpl, etc.). Used for error messages. + */ + virtual const char* tensorimpl_type_name() const { + return "TensorImpl"; + } + + private: + [[noreturn]] void throw_storage_access_error() const; + [[noreturn]] void throw_data_ptr_access_error() const; + + ExtraMeta& get_extra_meta() { + if (!extra_meta_) { + extra_meta_ = std::make_unique(); + } + return *extra_meta_; + } + + c10::SymbolicShapeMeta& symbolic_shape_meta() { + TORCH_INTERNAL_ASSERT(extra_meta_ && extra_meta_->symbolic_shape_meta_); + return *extra_meta_->symbolic_shape_meta_; + } + + const c10::SymbolicShapeMeta& symbolic_shape_meta() const { + TORCH_INTERNAL_ASSERT(extra_meta_ && extra_meta_->symbolic_shape_meta_); + return *extra_meta_->symbolic_shape_meta_; + } + + public: + /** + * True if a tensor has no elements (e.g., numel() == 0). + */ + inline bool is_empty() const { + return numel() == 0; + } + + // if we are going to use sym sizes, we should be setting sym strides at the + // same time, otherwise it's very easy to misuse this API + void set_sizes_and_strides( + c10::SymIntArrayRef sizes, + c10::SymIntArrayRef strides, + std::optional storage_offset = std::nullopt); + // This is renamed to avoid breaking overload BC + void generic_set_sizes_contiguous(c10::SymIntArrayRef sizes); + void generic_set_sizes_contiguous(c10::IntArrayRef sizes) { + set_sizes_contiguous(sizes); + } + + /** + * Change the size at some dimension. This DOES NOT update strides; + * thus, most changes to size will not preserve contiguity. You probably + * also want to call set_stride() when you call this. + * + * TODO: This should be jettisoned in favor of `set_sizes_and_strides`, + * which is harder to misuse. + */ + virtual void set_size(int64_t dim, int64_t new_size) { + TORCH_CHECK( + allow_tensor_metadata_change(), + "set_size ", + err_msg_tensor_metadata_change_not_allowed); + TORCH_CHECK( + !matches_policy(SizesStridesPolicy::CustomSizes), + "set_size() called on tensor with dynamic shapes or customized size behavior") + sizes_and_strides_.size_at(dim) = new_size; + refresh_numel(); + refresh_contiguous(); + } + + /** + * Change the stride at some dimension. + * + * TODO: This should be jettisoned in favor of `set_sizes_and_strides`, + * which is harder to misuse. + */ + virtual void set_stride(int64_t dim, int64_t new_stride) { + TORCH_CHECK( + allow_tensor_metadata_change(), + "set_stride ", + err_msg_tensor_metadata_change_not_allowed); + TORCH_CHECK( + !has_symbolic_sizes_strides_, + "set_stride() called on tensor with symbolic shape") + sizes_and_strides_.stride_at_unchecked(dim) = new_stride; + refresh_contiguous(); + } + + /** + * Set the offset into the storage of this tensor. + * + * WARNING: This does NOT check if the tensor is in bounds for the new + * location at the storage; the caller is responsible for checking this + * (and resizing if necessary.) + */ + virtual void set_storage_offset(int64_t storage_offset) { + TORCH_CHECK( + allow_tensor_metadata_change(), + "set_storage_offset ", + err_msg_tensor_metadata_change_not_allowed); + // TODO: this should probably consult policy + TORCH_CHECK( + !has_symbolic_sizes_strides_, + "set_storage_offset() called on tensor with symbolic shape") + storage_offset_ = storage_offset; + } + + /** + * Like set_sizes_and_strides but assumes contiguous strides. + * + * WARNING: This function does not check if the requested + * sizes/strides are in bounds for the storage that is allocated; + * this is the responsibility of the caller + */ + void set_sizes_contiguous(IntArrayRef new_size) { + TORCH_CHECK( + allow_tensor_metadata_change(), + "set_sizes_contiguous ", + err_msg_tensor_metadata_change_not_allowed); + TORCH_CHECK( + !matches_policy(SizesStridesPolicy::CustomStrides), + "tried to directly modify sizes for customized tensor"); + sizes_and_strides_.set_sizes(new_size); + + refresh_numel(); + empty_tensor_restride( + MemoryFormat::Contiguous); // calls refresh_contiguous() + } + + C10_ALWAYS_INLINE const impl::SizesAndStrides& sizes_and_strides() { + return sizes_and_strides_; + } + + /** + * Set the sizes and strides of a tensor. + * + * WARNING: This function does not check if the requested + * sizes/strides are in bounds for the storage that is allocated; + * this is the responsibility of the caller + */ + void set_sizes_and_strides( + IntArrayRef new_size, + IntArrayRef new_stride, + std::optional storage_offset = std::nullopt) { + TORCH_CHECK( + allow_tensor_metadata_change(), + "set_sizes_and_strides ", + err_msg_tensor_metadata_change_not_allowed); + TORCH_CHECK( + !has_symbolic_sizes_strides_, + "set_sizes_and_strides() called on tensor with symbolic shape") + TORCH_CHECK( + new_size.size() == new_stride.size(), + "dimensionality of sizes (", + new_size.size(), + ") must match dimensionality of strides (", + new_stride.size(), + ")"); + const auto new_dim = new_size.size(); + bool overflowed = false; + sizes_and_strides_.set_sizes(new_size); + + if (new_dim > 0) { + for (size_t dim = new_dim - 1;; dim--) { + if (new_stride[dim] >= 0) { + sizes_and_strides_.stride_at_unchecked(dim) = new_stride[dim]; + } else { + // XXX: This behavior is surprising and may need to be removed to + // support negative strides. Some pytorch functions rely on it: + // for example, torch.cat (run TestTorch.test_cat_empty). + if (dim == new_dim - 1) { + sizes_and_strides_.stride_at_unchecked(dim) = 1; + } else { + // Keep stride monotonically increasing to match NumPy. + overflowed |= c10::mul_overflows( + sizes_and_strides_.stride_at_unchecked(dim + 1), + std::max( + sizes_and_strides_.size_at_unchecked(dim + 1), 1), + std::addressof(sizes_and_strides_.stride_at_unchecked(dim))); + } + } + if (dim == 0) + break; + } + TORCH_CHECK(!overflowed, "Stride calculation overflowed"); + } + + refresh_numel(); + refresh_contiguous(); + + if (storage_offset.has_value()) { + storage_offset_ = *storage_offset; + } + } + + /** + * Set whether a tensor allows changes to its metadata (e.g. sizes / strides / + * storage / storage_offset). See NOTE [ Metadata Change for a Detached Tensor + * ] for details. + */ + void set_allow_tensor_metadata_change(bool value [[maybe_unused]]) { + // TODO: at some point, we should kill this field completely. + allow_tensor_metadata_change_ = true; + } + + /** + * True if a tensor allows changes to its metadata (e.g. sizes / strides / + * storage / storage_offset). See NOTE [ Metadata Change for a Detached Tensor + * ] for details. + */ + bool allow_tensor_metadata_change() const { + return allow_tensor_metadata_change_; + } + + /** + * Set the pointer to autograd metadata. + */ + void set_autograd_meta( + std::unique_ptr autograd_meta); + + /** + * Return the pointer to autograd metadata. May return nullptr if the + * tensor does not track gradients. + */ + c10::AutogradMetaInterface* autograd_meta() const; + + /** + * Set the pointer to named tensor metadata. + */ + void set_named_tensor_meta( + std::unique_ptr named_tensor_meta) { + TORCH_WARN_ONCE( + "Named tensors and all their associated APIs are an experimental feature ", + "and subject to change. Please do not use them for anything important ", + "until they are released as stable."); +#ifdef DEBUG + if (named_tensor_meta) { + TORCH_INTERNAL_ASSERT(named_tensor_meta->slow_dim() == dim()); + } +#endif + if (named_tensor_meta) { + get_extra_meta().named_tensor_meta_ = std::move(named_tensor_meta); + key_set_ = key_set_.add(DispatchKey::Named); + } else { + if (extra_meta_) { + extra_meta_->named_tensor_meta_ = nullptr; + } + key_set_ = key_set_.remove(DispatchKey::Named); + } + } + + void set_python_dispatch(bool k) { + if (k) { + key_set_ = key_set_.add(c10::python_ks); + } else { + key_set_ = key_set_ - c10::python_ks; + } + } + + bool is_python_dispatch() const { + return key_set_.has_all(c10::python_ks); + } + + /** + * Return the pointer to named tensor metadata. + */ + const c10::NamedTensorMetaInterface* named_tensor_meta() const { + if (!extra_meta_) { + return nullptr; + } + return extra_meta_->named_tensor_meta_.get(); + } + + c10::NamedTensorMetaInterface* named_tensor_meta() { + if (!extra_meta_) { + return nullptr; + } + return extra_meta_->named_tensor_meta_.get(); + } + + bool has_named_tensor_meta() const { + if (!extra_meta_) { + return false; + } + return extra_meta_->named_tensor_meta_ != nullptr; + } + + // NOTE [ TensorImpl Shallow-Copying ] + // + // TensorImpl shallow-copying is used when we want to have two Variables share + // the same tensor metadata (e.g. sizes / strides / storage pointer / + // storage_offset), but each with a different autograd history. Example call + // sites: + // + // 1. `var_detached = var.detach()` uses `shallow_copy_and_detach()` to create + // `var_detached` that shares the same tensor metadata with `var`, but with a + // completely new autograd history. + // 2. `var.set_data(tensor)` uses `shallow_copy_from()` to copy tensor + // metadata from `tensor` into `var`, while keeping `var`'s original + // AutogradMeta. + // + // Functions that shallow-copy a TensorImpl (such as + // `shallow_copy_and_detach()` / `shallow_copy_from()` / + // `copy_tensor_metadata()`) copy the tensor metadata fields (e.g. sizes / + // strides / storage pointer / storage_offset) by value. However, the + // following fields are not copied: + // + // 1. the AutogradMeta pointer, because it is unique for each Variable. + // 2. the version counter, because the destination TensorImpl's version + // counter is either set to the passed-in `version_counter` (in + // `shallow_copy_and_detach()` and `copy_tensor_metadata()`), or it is kept + // intact (in `shallow_copy_from()`). See NOTE [ Version Counter Sharing ] for + // details. + // + // In `shallow_copy_and_detach()` and `copy_tensor_metadata()`, the passed-in + // `allow_tensor_metadata_change` determines whether the TensorImpl + // shallow-copy allows changes to its metadata (e.g. sizes / strides / storage + // / storage_offset). See NOTE [ Metadata Change for a Detached Tensor ] for + // details. + // + // In `shallow_copy_from()`, we don't check the destination TensorImpl's + // `allow_tensor_metadata_change_`, because `shallow_copy_from()` is used for + // implementing functions such as `var.set_data(tensor)`, which changes + // `var`'s tensor metadata and expects its `allow_tensor_metadata_change_` to + // be ignored. + + /** + * One TensorImpl can be copied to another TensorImpl if they have the same + * DispatchKeySet. The only two special cases (for legacy reason) are: + * CPU is compatible with CUDA and SparseCPU is + * compatible with SparseCUDA. + */ + inline bool has_compatible_shallow_copy_type(DispatchKeySet from) { + auto is_dense = [](DispatchKeySet ts) { + constexpr auto dense_backends = DispatchKeySet( + {BackendComponent::CPUBit, + BackendComponent::CUDABit, + BackendComponent::MPSBit, + BackendComponent::HIPBit, + BackendComponent::XPUBit, + BackendComponent::HPUBit, + BackendComponent::MTIABit}); + constexpr auto dense_k = DispatchKeySet(DispatchKey::Dense); + return ts.has_any(dense_k) && ts.has_any(dense_backends); + }; + auto is_sparse = [](DispatchKeySet ts) { + constexpr auto sparse_backends = DispatchKeySet( + {BackendComponent::CPUBit, + BackendComponent::CUDABit, + BackendComponent::HIPBit, + BackendComponent::XPUBit}); + constexpr auto sparse_k = DispatchKeySet(DispatchKey::Sparse); + return ts.has_any(sparse_k) && ts.has_any(sparse_backends); + }; + auto is_sparse_compressed = [](DispatchKeySet ts) { + constexpr auto sparse_compressed_k = + DispatchKeySet(DispatchKey::SparseCsr); + return ts.has_any(sparse_compressed_k); + }; + return (key_set_ == from) || (is_dense(key_set_) && is_dense(from)) || + (is_sparse(key_set_) && is_sparse(from)) || + (is_sparse_compressed(key_set_) && is_sparse_compressed(from)); + ; + } + + private: + template + c10::intrusive_ptr shallow_copy_and_detach_core( + VariableVersion&& version_counter, + bool allow_tensor_metadata_change) const; + + public: + /** + * Return a TensorImpl that is a shallow-copy of this TensorImpl. + * + * For usage of `version_counter` and `allow_tensor_metadata_change`, + * see NOTE [ TensorImpl Shallow-Copying ]. + */ + virtual c10::intrusive_ptr shallow_copy_and_detach( + const c10::VariableVersion& version_counter, + bool allow_tensor_metadata_change) const; + + /** + * Return a TensorImpl that is a shallow-copy of this TensorImpl. + * + * For usage of `version_counter` and `allow_tensor_metadata_change`, + * see NOTE [ TensorImpl Shallow-Copying ]. + */ + virtual c10::intrusive_ptr shallow_copy_and_detach( + c10::VariableVersion&& version_counter, + bool allow_tensor_metadata_change) const; + + /** + * Shallow-copies data from another TensorImpl into this TensorImpl. + * + * For why this function doesn't check this TensorImpl's + * `allow_tensor_metadata_change_`, see NOTE [ TensorImpl Shallow-Copying ]. + */ + virtual void shallow_copy_from(const c10::intrusive_ptr& impl) { + copy_tensor_metadata( + /*src_impl=*/impl.get(), + /*dest_impl=*/this, + /*version_counter=*/version_counter(), + /*allow_tensor_metadata_change=*/allow_tensor_metadata_change()); + } + + // Inference tensor doesn't have version counter, + // set_version_counter is no-op for them. + void set_version_counter(const c10::VariableVersion& version_counter) { + TORCH_CHECK( + !(is_inference() && version_counter.enabled()), + "Cannot set version_counter for inference tensor"); + version_counter_ = version_counter; + } + + void set_version_counter(c10::VariableVersion&& version_counter) { + TORCH_CHECK( + !(is_inference() && version_counter.enabled()), + "Cannot set version_counter for inference tensor"); + version_counter_ = std::move(version_counter); + } + + const c10::VariableVersion& version_counter() const noexcept { + return version_counter_; + } + + void bump_version() { + version_counter_.bump(); + } + + impl::PyObjectSlot* pyobj_slot() { + return &pyobj_slot_; + } + + const impl::PyObjectSlot* pyobj_slot() const { + return &pyobj_slot_; + } + + private: + // See NOTE [std::optional operator usage in CUDA] + // We probably don't want to expose this publicly until + // the note is addressed. + std::optional device_opt() const { + return device_opt_; + } + + public: + /** + * The device type of a Tensor, e.g., DeviceType::CPU or DeviceType::CUDA. + */ + DeviceType device_type() const { + // TODO: A useful internal assert would be to show that device_opt_ is null + // only if you are an undefined tensor + TORCH_CHECK( + device_opt_.has_value(), + "device_type cannot be run on undefined Tensor"); + // See NOTE [std::optional operator usage in CUDA] + return (*device_opt_).type(); + } + + /** + * @brief Extends the outer-most dimension of this tensor by num elements, + * preserving the existing data. + * + * The underlying data may be reallocated in order to accommodate the new + * elements, in which case this tensors' capacity is grown at a factor of + * growthPct. This ensures that Extend runs on an amortized O(1) time + * complexity. + * + * This op is auto-asynchronous if the underlying device (CUDA) supports it. + */ + void Extend(int64_t num, float growthPct); + + /** + * @brief Reserve space for the underlying tensor. + * + * This must be called after Resize(), since we only specify the first + * dimension This does not copy over the old data to the newly allocated space + */ + void ReserveSpace(int64_t outer_dim); + + /** + * @brief Resizes a tensor. + * + * Resize takes in a vector of ints specifying the dimensions of the tensor. + * You can pass in an empty vector to specify that it is a scalar (i.e. + * containing one single item). + * + * The underlying storage may be deleted after calling Resize: if the new + * shape leads to a different number of items in the tensor, the old memory + * is deleted and new memory will be allocated next time you call + * mutable_data(). However, if the shape is different but the total number of + * items is the same, the underlying storage is kept. + * + * This method respects caffe2_keep_on_shrink. Consult the internal logic + * of this method to see exactly under what circumstances this flag matters. + */ + template + void Resize(Ts... dim_source) { + bool size_changed = SetDims(dim_source...); + if (size_changed) { + HandleResize(); + } + } + + template + void Resize(const std::vector& dim_source) { + Resize(ArrayRef(dim_source)); + } + + /** + * Resizes the tensor without touching underlying storage. + * This requires the total size of the tensor to remains constant. + */ + void Reshape(const std::vector& dims); + + /** + * Release whatever memory the tensor was holding but keep size and type + * information. Subsequent call to mutable_data will trigger new memory + * allocation. + */ + void FreeMemory(); + + /** + * @brief Shares the data with another tensor. + * + * To share data between two tensors, the sizes of the two tensors must be + * equal already. The reason we do not implicitly do a Resize to make the two + * tensors have the same shape is that we want to allow tensors of different + * shapes but the same number of items to still be able to share data. This + * allows one to e.g. have a n-dimensional Tensor and a flattened version + * sharing the same underlying storage. + * + * The source tensor should already have its data allocated. + */ + // To be deprecated + void ShareData(const TensorImpl& src); + + void ShareExternalPointer( + DataPtr&& data_ptr, + const caffe2::TypeMeta data_type, + size_t size_bytes); + + /** + * Returns a mutable raw pointer of the underlying storage. Since we will need + * to know the type of the data for allocation, a TypeMeta object is passed in + * to specify the necessary information. This is conceptually equivalent of + * calling mutable_data() where the TypeMeta parameter meta is derived from + * the type T. This function differs from mutable_data() in the sense that + * the type T can be specified during runtime via the TypeMeta object. + * + * If the existing data does not match the desired type, it will be deleted + * and a new storage will be created. + */ + inline void* raw_mutable_data(const caffe2::TypeMeta& meta) { + // For 0-size tensors it's fine to return any pointer (including nullptr) + if (data_type_ == meta && storage_initialized()) { + return static_cast( + static_cast(storage_.mutable_data()) + + storage_offset_ * meta.itemsize()); + } else { + bool had_special_dtor = data_type_.placementDelete() != nullptr; + storage_offset_ = 0; + data_type_ = meta; + // NB: device is not changed + + // We can reuse the existing buffer if the current data does not have + // a special destructor and the new data doesn't have a special + // constructor. + if (numel_ == 0 || + (meta.placementNew() == nullptr && !had_special_dtor && + (storage_.nbytes() >= (numel_ * data_type_.itemsize())))) { + TORCH_INTERNAL_ASSERT( + storage_offset_ == 0); // because we just reallocated + return storage_.mutable_data(); + } + Allocator* allocator = storage_.allocator(); + // Storage might have nullptr allocator in rare cases, for example, if + // an external memory segment has been wrapped with Tensor and we don't + // know how to reallocate it. However, in order to preserve legacy C2 + // behavior, we allow reallocating the memory using default allocator. + if (allocator == nullptr) { + allocator = GetAllocator(storage_.device_type()); + } + if (meta.placementNew()) { + // For types that need placement new, we will call it, as well as + // making sure that when the data is freed, it calls the right + // destruction procedure. + auto size = numel_; + auto dtor = data_type_.placementDelete(); + auto data_ptr = allocator->allocate(numel_ * data_type_.itemsize()); + storage_.set_data_ptr_noswap(PlacementDeleteContext::makeDataPtr( + std::move(data_ptr), dtor, size, storage_.device())); + data_type_.placementNew()(storage_.mutable_data(), numel_); + } else { + // For fundamental type, new and delete is easier. + storage_.set_data_ptr_noswap( + allocator->allocate(numel_ * data_type_.itemsize())); + } + storage_.set_nbytes(numel_ * data_type_.itemsize()); + TORCH_INTERNAL_ASSERT( + storage_offset_ == 0); // because we just reallocated + device_opt_ = storage_.device(); + return storage_.mutable_data(); + } + } + + /** + * Returns a typed pointer of the underlying storage. + * + * For fundamental types, we reuse possible existing storage if there + * is sufficient capacity. + */ + template + inline T* mutable_data() { + if (storage_initialized() && data_type_.Match()) { + return static_cast(storage_.mutable_data()) + storage_offset_; + } + // Check it here statically - otherwise TypeMeta would throw the runtime + // error in attempt to invoke TypeMeta::ctor() + static_assert( + std::is_default_constructible_v, + "Tensor can't hold non-default-constructable types"); + return static_cast(raw_mutable_data(caffe2::TypeMeta::Make())); + } + + /** + * True if a tensor is storage initialized. A tensor may become + * storage UNINITIALIZED after a Resize() or FreeMemory() + */ + bool storage_initialized() const { + TORCH_CHECK( + has_storage(), + "cannot call storage_initialized on tensor that does not have storage"); + return storage_.data() || numel_ == 0; + } + + /** + * True if a tensor is dtype initialized. A tensor allocated with + * Caffe2-style constructors is dtype uninitialized until the + * first time mutable_data() is called. + */ + bool dtype_initialized() const noexcept { + return data_type_ != caffe2::TypeMeta(); + } + + void set_storage_keep_dtype(at::Storage storage) { + TORCH_CHECK( + allow_tensor_metadata_change(), + "set_storage ", + err_msg_tensor_metadata_change_not_allowed); + storage_ = std::move(storage); + device_opt_ = storage_.device(); + } + + void set_storage_and_dtype( + at::Storage storage, + const caffe2::TypeMeta data_type) { + set_storage_keep_dtype(std::move(storage)); + data_type_ = data_type; + } + + void empty_tensor_restride_symint(MemoryFormat memory_format); + + /** + * Set the strides of the tensor to match memory_format + * + * WARNING: This function doesn't rearrange data and assumes tensor is a + * memory contiguous + */ + void empty_tensor_restride(MemoryFormat memory_format) { + if (has_symbolic_sizes_strides_) { + empty_tensor_restride_symint(memory_format); + return; + } +#ifdef DEBUG + TORCH_INTERNAL_ASSERT( + compute_numel() == numel_, + "If you are seeing this error, that means empty_tensor_restride was " + "called before setting correct numel"); +#endif + switch (memory_format) { + case MemoryFormat::Contiguous: { + // dim_ is a virtual call, don't repeat it + const auto dim_ = dim(); + sizes_and_strides_.resize(dim_); + if (dim_ > 0) { + bool overflowed = false; + const auto last_idx = dim_ - 1; + sizes_and_strides_.stride_at_unchecked(last_idx) = 1; + for (auto i = last_idx - 1; i >= 0; --i) { + overflowed |= c10::mul_overflows( + sizes_and_strides_.stride_at_unchecked(i + 1), + std::max( + sizes_and_strides_.size_at_unchecked(i + 1), 1), + std::addressof(sizes_and_strides_.stride_at_unchecked(i))); + } + TORCH_CHECK(!overflowed, "Stride calculation overflowed"); + } + break; + } + case MemoryFormat::ChannelsLast: { + TORCH_CHECK( + dim() == 4, "required rank 4 tensor to use channels_last format"); + set_sizes_and_strides(sizes(), get_channels_last_strides_2d(sizes())); + break; + } + case MemoryFormat::ChannelsLast3d: { + TORCH_CHECK( + dim() == 5, + "required rank 5 tensor to use channels_last_3d format"); + set_sizes_and_strides(sizes(), get_channels_last_strides_3d(sizes())); + break; + } + case MemoryFormat::Preserve: + TORCH_CHECK(false, "unsupported memory format ", memory_format); + // Cleaning warning messages, no need to break as TORCH_CHECK(false) + // terminates flow. + // break; + case MemoryFormat::NumOptions: + TORCH_INTERNAL_ASSERT(false, "invalid memory format ", memory_format); + } + // recompute contiguous flag, as currently NHWC/NCHW flags are not mutually + // exclusive see #24090 + refresh_contiguous(); + } + + bool is_strides_like(at::MemoryFormat memory_format) const { + if (C10_UNLIKELY(matches_policy(SizesStridesPolicy::CustomStrides))) { + return is_strides_like_custom(memory_format); + } + return is_strides_like_default(memory_format); + } + + bool is_strides_like_channels_last() const { + return is_strides_like(at::MemoryFormat::ChannelsLast); + } + + bool is_strides_like_channels_last_3d() const { + return is_strides_like(at::MemoryFormat::ChannelsLast3d); + } + + bool is_non_overlapping_and_dense() const { + if (C10_UNLIKELY(matches_policy(SizesStridesPolicy::CustomStrides))) { + return is_non_overlapping_and_dense_custom(); + } + return is_non_overlapping_and_dense_default(); + } + + // if this returns true, then it is guaranteed that this tensor has symbolic + // sizes/strides + bool has_symbolic_sizes_strides() const { + return has_symbolic_sizes_strides_; + } + + private: + void HandleResize(); + + // The Caffe2 Resize() method supports being called both as Resize({2,2}) as + // well as variadic with Resize(2, 2). These overloads provide all of the + // supported calling configurations, while being overloads (and not templates) + // so that implicit conversions still work. + // + // SetDims on ArrayRef is internally implemented as a template, so we can + // handle both ArrayRefs of different types (there are some uses of + // Resize in Caffe2 which pass in int, not int64_t.) + + template < + typename T, + typename = typename std::enable_if_t>> + bool SetDimsTemplate(ArrayRef src) { + TORCH_CHECK( + !has_symbolic_sizes_strides_, + "SetDims() called on tensor with symbolic shape") + + auto old_numel = numel_; + sizes_and_strides_.resize(src.size()); + int64_t new_numel = 1; + for (const auto i : c10::irange(src.size())) { + new_numel *= src[i]; + sizes_and_strides_.size_at_unchecked(i) = src[i]; + } + numel_ = new_numel; + empty_tensor_restride(MemoryFormat::Contiguous); + return numel_ != old_numel; + } + + bool SetDims(ArrayRef s) { + return SetDimsTemplate(s); + } + + bool SetDims(ArrayRef s) { + return SetDimsTemplate(s); + } + + bool SetDims(ArrayRef s) { + return SetDimsTemplate(s); + } + + bool SetDims() { + return SetDims(IntArrayRef{}); + } + + bool SetDims(const int64_t d0) { + return SetDims(IntArrayRef{d0}); + } + + bool SetDims(const int64_t d0, const int64_t d1) { + return SetDims(IntArrayRef{d0, d1}); + } + + bool SetDims(const int64_t d0, const int64_t d1, const int64_t d2) { + return SetDims(IntArrayRef{d0, d1, d2}); + } + + bool SetDims( + const int64_t d0, + const int64_t d1, + const int64_t d2, + const int64_t d3) { + return SetDims(IntArrayRef{d0, d1, d2, d3}); + } + + /** + * Compute the number of elements based on the sizes of a tensor. + */ + // NB: This is ONLY called when sizes_and_strides_ is used directly; if + // we are virtualizing, then numel calls are virtualized as well, and this + // should never get called + int64_t compute_numel() const { + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(!has_symbolic_sizes_strides_); +#if C10_HAS_BUILTIN_OVERFLOW() && !defined(C10_MOBILE) + // Use overflow checks if supported by the compiler + return safe_compute_numel(); +#else + return c10::multiply_integers(sizes_and_strides_.sizes_arrayref()); +#endif + } + + /** + * Compute the number of elements based on the sizes of a + * tensor. Catches integer overflow that may occur when a tensor + * using a sparse layout has multiple dimensions with large sizes. + */ + int64_t safe_compute_numel() const { + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(!has_symbolic_sizes_strides_); + uint64_t n = 1; + bool overflows = + c10::safe_multiplies_u64(sizes_and_strides_.sizes_arrayref(), &n); + constexpr auto numel_max = std::min( + static_cast(std::numeric_limits::max()), + static_cast(std::numeric_limits::max())); + + overflows |= (n > numel_max); + TORCH_CHECK(!overflows, "numel: integer multiplication overflow"); + return static_cast(n); + } + + /** + * Compute whether or not a tensor is contiguous based on the sizes and + * strides of a tensor. + */ + bool compute_contiguous() const; + + bool compute_channels_last_contiguous_2d() const; + + bool compute_channels_last_contiguous_3d() const; + + bool compute_strides_like_channels_last_2d() const; + + bool compute_strides_like_channels_last_3d() const; + + bool compute_non_overlapping_and_dense() const; + + protected: + /** + * Recompute the cached numel of a tensor. Call this if you modify + * sizes. + * + * For tensors with sparse layouts, use safe_refresh_numel() instead + * because it will catch integer overflow that may occur for tensors + * with sparse layouts and large dimensions. + * + * NB: We may uselessly recompute cached numel even in situations where + * it is completely never used (e.g., if CustomSizes for Python). However, + * we still must keep it up to date in case the Python overload + * returns None (in which case we will consult the field here). This also + * implies that sizes/strides will never be complete garbage; in the + * very worst case scenario, it will reflect a 1-dim zero size tensor. + */ + void refresh_numel() { + if (has_symbolic_sizes_strides_) { + symbolic_shape_meta().refresh_numel(); + } else { + numel_ = compute_numel(); + } + } + + /** + * Recompute the cached numel of a tensor. Call this if you modify + * sizes. Use only for tensors with sparse layouts because only + * sparse tensor are likely to have sizes that may lead to integer + * overflow when computing numel. + */ + void safe_refresh_numel() { + if (has_symbolic_sizes_strides_) { + // NB: sym numel is done with symbolic integers, which handle overflow + // checking + symbolic_shape_meta().refresh_numel(); + } else { + numel_ = safe_compute_numel(); + } + } + + private: + void _set_is_contiguous(bool b) { + is_contiguous_ = b; + } + + void _set_is_channels_last_contiguous(bool b) { + is_channels_last_contiguous_ = b; + } + + void _set_is_channels_last_3d_contiguous(bool b) { + is_channels_last_3d_contiguous_ = b; + } + + void _set_is_channels_last(bool b) { + is_channels_last_ = b; + } + + void _set_is_channels_last_3d(bool b) { + is_channels_last_3d_ = b; + } + + void _set_is_non_overlapping_and_dense(bool b) { + is_non_overlapping_and_dense_ = b; + } + + // These are little wrappers over the real compute_ functions that + // can make use of other contiguity fields to short circuit. + + bool compute_is_non_overlapping_and_dense_dim4() { + return is_contiguous_ || is_channels_last_contiguous_ || + compute_non_overlapping_and_dense(); + } + + bool compute_channels_last_contiguous_3d_dim5() { + return !is_channels_last_contiguous_ && + compute_channels_last_contiguous_3d(); + } + + bool compute_channels_last_2d_dim5() { + return !is_channels_last_3d_contiguous_ && + compute_strides_like_channels_last_2d(); + } + + bool compute_channels_last_3d_dim5() { + return !is_channels_last_ && compute_strides_like_channels_last_3d(); + } + + bool compute_is_non_overlapping_and_dense_dim5() { + return is_contiguous_ || is_channels_last_contiguous_ || + is_channels_last_3d_contiguous_ || compute_non_overlapping_and_dense(); + } + + bool compute_is_non_overlapping_and_dense_anydim() { + return is_contiguous_ || compute_non_overlapping_and_dense(); + } + + void _refresh_contiguous() { + // Note: + // Dim 0, 1, 2 will never be a channels last 2d/3d format + // Dim 3+ is possibly be a channels last 2d format (Dim 4 only at this + // point) Dim 4+ is possibly be a channels last 3d format (Dim 5 only at + // this point) + switch (dim()) { + case 4: { + _set_is_contiguous(compute_contiguous()); + _set_is_channels_last_contiguous(compute_channels_last_contiguous_2d()); + _set_is_channels_last_3d_contiguous(false); + _set_is_channels_last(compute_strides_like_channels_last_2d()); + _set_is_channels_last_3d(false); + _set_is_non_overlapping_and_dense( + compute_is_non_overlapping_and_dense_dim4()); + break; + } + case 5: { + _set_is_contiguous(compute_contiguous()); + _set_is_channels_last_contiguous(compute_channels_last_contiguous_2d()); + _set_is_channels_last_3d_contiguous( + compute_channels_last_contiguous_3d_dim5()); + _set_is_channels_last(compute_channels_last_2d_dim5()); + _set_is_channels_last_3d(compute_channels_last_3d_dim5()); + _set_is_non_overlapping_and_dense( + compute_is_non_overlapping_and_dense_dim5()); + break; + } + default: + // is_channels_last_ and is_channels_last_3d_ are suggested + // memory_format. Being channels_last_contiguous doesn't necessarily + // mean the tensor is strided like channels_last: for strides on channel + // dimension could suggest desired memory_layout, but it doesn't affect + // memory storage + _set_is_contiguous(compute_contiguous()); + _set_is_channels_last_contiguous(false); + _set_is_channels_last_3d_contiguous(false); + _set_is_channels_last(false); + _set_is_channels_last_3d(false); + _set_is_non_overlapping_and_dense( + compute_is_non_overlapping_and_dense_anydim()); + break; + } + } + + protected: + /** + * Recompute the cached contiguity of a tensor. Call this if you modify sizes + * or strides. + */ + void refresh_contiguous() { + if (has_symbolic_sizes_strides_) { + symbolic_shape_meta().refresh_contiguous(); + } else { + _refresh_contiguous(); + } + } + + /** + * Copy the tensor metadata fields (e.g. sizes / strides / storage pointer / + * storage_offset) from one TensorImpl to another TensorImpl. + * + * For usage of `version_counter` and `allow_tensor_metadata_change`, see NOTE + * [ TensorImpl Shallow-Copying ]. + */ + static void copy_tensor_metadata( + const TensorImpl* src_impl, + TensorImpl* dest_impl, + const c10::VariableVersion& version_counter, + bool allow_tensor_metadata_change); + + /** + * Copy the tensor metadata fields (e.g. sizes / strides / storage pointer / + * storage_offset) from one TensorImpl to another TensorImpl. + * + * For usage of `version_counter` and `allow_tensor_metadata_change`, see NOTE + * [ TensorImpl Shallow-Copying ]. + */ + static void copy_tensor_metadata( + const TensorImpl* src_impl, + TensorImpl* dest_impl, + c10::VariableVersion&& version_counter, + bool allow_tensor_metadata_change); + + private: + static void copy_tensor_metadata_except_version_counter( + const TensorImpl* src_impl, + TensorImpl* dest_impl, + bool allow_tensor_metadata_change); + + protected: + // Error message to show when the user tries to change tensor metadata on + // Tensor created from .data or .detach(). + // + // See NOTE [ Metadata Change for a Detached Tensor ] for details. + static const char* const err_msg_tensor_metadata_change_not_allowed; + + static void copy_generic_tensor_metadata( + const TensorImpl* src_impl, + TensorImpl* dest_impl); + + public: + void set_storage_access_should_throw() { + storage_access_should_throw_ = true; + } + + public: + void set_custom_sizes_strides(SizesStridesPolicy policy) { + custom_sizes_strides_ = static_cast(policy); + refresh_sizes_strides_policy(); + } + + void set_python_custom_sizes_strides(SizesStridesPolicy policy) { + python_custom_sizes_strides_ = static_cast(policy); + refresh_sizes_strides_policy(); + } + + void set_custom_device(bool custom_device) { + custom_device_ = custom_device; + refresh_device_policy(); + } + + void set_custom_layout(bool custom_layout) { + custom_layout_ = custom_layout; + refresh_layout_policy(); + } + + void set_python_custom_device(bool custom_device) { + python_custom_device_ = custom_device; + refresh_device_policy(); + } + + void set_python_custom_layout(bool custom_layout) { + python_custom_layout_ = custom_layout; + refresh_layout_policy(); + } + + protected: + void refresh_sizes_strides_policy() { + if (has_symbolic_sizes_strides_) { + sizes_strides_policy_ = + static_cast(SizesStridesPolicy::CustomSizes); + } else { + sizes_strides_policy_ = + std::max(custom_sizes_strides_, python_custom_sizes_strides_); + } + } + + void refresh_device_policy() { + device_policy_ = custom_device_ || python_custom_device_; + } + + void refresh_layout_policy() { + layout_policy_ = custom_layout_ || python_custom_layout_; + } + + protected: + Storage storage_; + + private: + // This pointer points to an AutogradMeta struct that stores autograd-specific + // fields (such as grad_ / grad_fn_ / grad_accumulator_). This pointer always + // has unique ownership (meaning only one TensorImpl can own it at a time). + // + // autograd_meta_ can be nullptr, as an optimization. When this occurs, it is + // equivalent to having an autograd_meta_ pointing to a default constructed + // AutogradMeta; intuitively, tensors which don't require grad will have this + // field set to null. + // + // This means accessors on autograd_meta_ have to be careful to test if they + // got a nullptr, and handle default behavior appropriately in that case. + // + // Note that we don't enforce the invariant that if the AutogradMeta is + // default constructed, it is nullptr (to do this, we'd have to continuously + // check if an AutogradMeta became, by mutation, equal to the default + // constructed form. (This might be useful, but it seems rare enough that + // a requires_grad=True variable will turn back into the requires_grad=False + // version.) So there are three representable states: + // + // 1. autograd_meta_ == nullptr + // 2. autograd_meta_ is default constructed (semantically, same as (1)) + // 3. autograd_meta_ has nontrivial information content + // + std::unique_ptr autograd_meta_ = nullptr; + + protected: + std::unique_ptr extra_meta_ = nullptr; + + c10::VariableVersion version_counter_; + + impl::PyObjectSlot pyobj_slot_; + + c10::impl::SizesAndStrides sizes_and_strides_; + + int64_t storage_offset_ = 0; + // If sizes and strides are empty, the numel is 1!! However, most of the + // time, we will immediately set sizes to {0} and reset numel to 0. + // (Can't do that in the default initializers, because there's no way to + // spell "allocate a one-element array" for strides_). + int64_t numel_ = 1; + + // INVARIANT: When storage is non-null, this type meta must + // agree with the type meta in storage + caffe2::TypeMeta data_type_; + + // NOTE [std::optional operator usage in CUDA] + // Our optional definition doesn't compile in .cu file if `value()` or + // `operator->` are used. Instead, we always use `operator*`. + // See https://github.com/pytorch/pytorch/issues/18496 for more info. + // If this is too burdensome to maintain, we can just + // manually implement this with an additional bool. + + // INVARIANT: When storage is non-null, this Device must + // agree with the type meta in storage. + // + // INVARIANT: device_opt_ is only nullopt for undefined tensors + // (which do not have a device.) + std::optional device_opt_; + + // default member initializers for bit-fields only available with -std=c++2a + // or -std=gnu++2a + inline void init_bitfields() { + is_contiguous_ = true; + is_channels_last_ = false; + is_channels_last_contiguous_ = false; + is_channels_last_3d_ = false; + is_channels_last_3d_contiguous_ = false; + is_non_overlapping_and_dense_ = true; + is_wrapped_number_ = false; + allow_tensor_metadata_change_ = true; + reserved_ = false; + sizes_strides_policy_ = static_cast(SizesStridesPolicy::Default); + custom_sizes_strides_ = static_cast(SizesStridesPolicy::Default); + python_custom_sizes_strides_ = + static_cast(SizesStridesPolicy::Default); + python_custom_device_ = false; + python_custom_layout_ = false; + custom_device_ = false; + custom_layout_ = false; + device_policy_ = false; + layout_policy_ = false; + storage_access_should_throw_ = false; + has_symbolic_sizes_strides_ = false; + } + + // Tensor is contiguous + bool is_contiguous_ : 1; + + // Tensor is a subclass that does not permit storage access. + bool storage_access_should_throw_ : 1; + + // Tensor is stored in the channels last 2d memory format, when dimensions + // order is (N)CHW and C-strides < W-strides < H-strides (< N-strides) + // (If size of any dimension is equal to 1, this dimension strides value + // is not taken into account). + bool is_channels_last_ : 1; + + // Channels last contiguous tensor is channel last tensor which occupies + // contiguous memory block. + bool is_channels_last_contiguous_ : 1; + + // Tensor is stored in the channels last 3d memory format, when dimensions + // order is (N)CDHW and C-strides < W-strides < H-strides < D - strides (< + // N-strides) (If size of any dimension is equal to 1, this dimension strides + // value is not taken into account). + bool is_channels_last_3d_ : 1; + + // Channels last 3d contiguous tensor is channel last 3d tensor which occupies + // contiguous memory block. + bool is_channels_last_3d_contiguous_ : 1; + + // Dense tensor is the tensor that store values in a contiguous block of + // memory. Non-overlapping tensor is the tensor in which elements occupy + // individual non-repetitive memory. + bool is_non_overlapping_and_dense_ : 1; + + bool is_wrapped_number_ : 1; + + // NOTE [ Metadata Change for a Detached Tensor ] + // + // Normally, a user is allowed to change the tensor metadata + // (e.g. sizes / strides / storage / storage_offset) of a tensor. + // However, if the tensor is created by `t1_detached = t1.data` in Python + // or `t1_detached = t1.detach()` in Python/C++, those changes to the + // tensor metadata of `t1_detached` will not be propagated back to the + // original tensor `t1`. In order to make such changes explicitly illegal, + // we created the `allow_tensor_metadata_change_` flag, to prevent users + // from changing metadata of the detached tensor and expecting the original + // tensor to also be updated. + // + // NOTE: For a full list of tensor metadata fields, please see + // `copy_tensor_metadata()` in TensorImpl and its subclasses to find + // which fields are copied by value. + bool allow_tensor_metadata_change_ : 1; + + // we decide to keep reserved_ and it will + // live in Tensor after the split + // The logic is that if Extend() or ReserveSpace() were ever called, + // then subsequent Resize()s will not free up Storage. + bool reserved_ : 1; + + // Call _custom() virtual methods for + // strides()/is_contiguous()/sizes()/dim()/numel() + // This is a combination of sizes_strides_custom_dispatch_ + // and has_symbolic_sizes_strides_ + uint8_t sizes_strides_policy_ : 2; + + // Whether or not sizes_and_strides_ contains a symbolic value. + bool has_symbolic_sizes_strides_ : 1; + + // Call _custom() virtual method for + // strides()/is_contiguous()/sizes()/dim()/numel() + uint8_t custom_sizes_strides_ : 2; + + // Combo of custom_ and python_custom_ + bool device_policy_ : 1; + bool layout_policy_ : 1; + + // Call _custom() virtual method for device() + bool custom_device_ : 1; + + // Call _custom() virtual method for layout() + bool custom_layout_ : 1; + + // Call into Python for + // strides()/is_contiguous()/sizes()/dim()/numel() + uint8_t python_custom_sizes_strides_ : 2; + + // Call into Python for device() + bool python_custom_device_ : 1; + + // Call into Python for layout() + bool python_custom_layout_ : 1; + + // The set of DispatchKeys which describe this tensor. NB: this + // does NOT include Autograd (historically, it did, but + // not anymore!) + // + // INVARIANT: extra_meta_->named_tensor_meta_ != nullptr <==> + // key_set_.has(DispatchKey::Named) + DispatchKeySet key_set_; + + private: + // C10_TensorImpl_Size_Check_Dummy_Class needs to be friends with + // TensorImpl so it can inspect the size of private fields + template < + size_t cplusplus, + size_t clang_ver_major, + size_t gcc_ver, + size_t gcc_ver_minor, + size_t nvcc, + size_t cuda_version, + size_t cuda_version_major, + size_t ptr_size> + friend class C10_TensorImpl_Size_Check_Dummy_Class; +}; + +// Note [TensorImpl size constraints] +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +// Changed the size of TensorImpl? If the size went down, good for +// you! Adjust the documentation below and the expected size. +// Did it go up? Read on... +// +// Struct size matters. In some production systems at Facebook, we have +// 400M live tensors during a training run. Do the math: every 64-bit +// word you add to Tensor is an extra 3.2 gigabytes in RAM. +// +// If you are a Facebook employee, you can check if the run in question +// has tipped you over the point using the command here: +// https://fburl.com/q5enpv98 +// +// For reference, we OOMed at 160 bytes (20 words) per TensorImpl. +// This is not counting overhead from strides out-of-line allocation and +// StorageImpl space and this is from before we inlined sizes and strides +// directly into TensorImpl as SmallVectors. +// +// Our memory usage on 32-bit systems is suboptimal, but we're not checking +// for it at the moment (to help avoid rage inducing cycles when the +// 32-bit number is wrong). +// +// Current breakdown: +// +// vtable pointer +// strong refcount TODO: pack these into one word +// weak refcount +// storage pointer +// autograd metadata pointer +// named tensor metadata pointer +// version counter pointer +// PyObjectSlot +// SizesAndStrides size/pointer +// SizesAndStrides sizes (pre-allocated 0) +// SizesAndStrides sizes (pre-allocated 1) +// SizesAndStrides sizes (pre-allocated 2) +// SizesAndStrides sizes (pre-allocated 3) +// SizesAndStrides sizes (pre-allocated 4) +// SizesAndStrides strides (pre-allocated 0) +// SizesAndStrides strides (pre-allocated 1) +// SizesAndStrides strides (pre-allocated 2) +// SizesAndStrides strides (pre-allocated 3) +// SizesAndStrides strides (pre-allocated 4) +// storage offset +// numel +// data type, device, is_contiguous, storage_access_should_throw_, bitfields +// DispatchKeySet +// + +// Various preprocessor macros we use to check that the +// TensorImpl size hasn't changed unexpectedly. We undef +// these later. +#ifndef __NVCC__ +#define C10_NVCC 0 +#else +#define C10_NVCC __NVCC__ +#endif + +#ifndef __CUDA_VER_MAJOR__ +#define C10_CUDA_VERSION_MAJOR 0 +#else +#define C10_CUDA_VERSION_MAJOR __CUDA_VER_MAJOR__ +#endif + +#ifndef CUDA_VERSION +#define C10_CUDA_VERSION 0 +#else +#define C10_CUDA_VERSION CUDA_VERSION +#endif + +#ifndef __clang_major__ +#define C10_CLANG_MAJOR_VERSION 0 +#else +#define C10_CLANG_MAJOR_VERSION __clang_major__ +#endif + +#ifndef __GNUC__ +#define C10_GCC_VERSION 0 +#else +#define C10_GCC_VERSION __GNUC__ +#endif + +#ifndef __GNUC_MINOR__ +#define C10_GCC_VERSION_MINOR 0 +#else +#define C10_GCC_VERSION_MINOR __GNUC_MINOR__ +#endif + +// We use a templatized class to both contain the logic of checking the sizes +// as well as to provide compile-time information that might be useful in +// figuring out why sizes may have changed. +// All the compile time information is given by the template fields that are +// always printed by the compiler when the static_assert fails. +template < + size_t cplusplus = __cplusplus, + size_t clang_ver_major = C10_CLANG_MAJOR_VERSION, + size_t gcc_ver = C10_GCC_VERSION, + size_t gcc_ver_minor = C10_GCC_VERSION_MINOR, + size_t nvcc = C10_NVCC, + size_t cuda_version = C10_CUDA_VERSION, + size_t cuda_version_major = C10_CUDA_VERSION_MAJOR, + size_t ptr_size = sizeof(void*)> +class C10_TensorImpl_Size_Check_Dummy_Class : private TensorImpl { + // Names of (non-bitfield) fields in TensorImpl; used to provide + // compile-time info about fields whose size changes unexpectedly. + enum class FieldNameEnum { + storage_, + autograd_meta_, + extra_meta_, + version_counter_, + pyobj_slot_, + sizes_and_strides_, + storage_offset_, + numel_, + data_type_, + device_opt_, + key_set_, + TOTAL_SIZE + }; + + // Provides compile-time equality check that reveals what numbers + // were used and on which quantity + template + constexpr static bool are_equal() { + static_assert( + Actual == Expected, + "Actual and Expected sizes of a field did not match!"); + return true; + } + + // Provides compile-time <= check that reveals what numbers + // were used and on which quantity + template + constexpr static bool is_le() { + static_assert( + Actual <= Expected, + "Actual and Expected sizes of a field did not match!"); + return true; + } + + public: + // Compile-time check that TensorImpl field sizes are as expected + // + // Observed total sizes and associated versions + // If you find a flag that predicts when unique_ptr has 16 bytes + // on 64-bit systems or when sizes_and_strides_ is 84 vs 88 bytes + // on 32-bit systems you get a cookie! + // Length | LLVM | GCC | C++ | CUDA + // 192 | ? | 11.2 | 201703 | 11040 + // 208 | ? | 11.2 | 201703 | 11040 + // 208 | ? | 11.2 | 201402 | 11040 + // 192 | ? | 11.2 | 201402 | 11040 + // 160 | 12 | 4.2 | 201703 | 0 + // + // To keep things clean, we split on systems here. + +#if UINTPTR_MAX == 0xFFFFFFFF + // This is a 32-bit system + static constexpr bool check_sizes() { + constexpr size_t tsize = 20 * sizeof(int64_t); + + // clang-format off + are_equal(); + are_equal(); + are_equal(); + are_equal(); + are_equal(); + is_le(); + are_equal(); + are_equal(); + are_equal(); + are_equal(); + are_equal(); + is_le(); + // clang-format on + + return true; + } +#else + // This is a 64-bit system + static constexpr bool check_sizes() { + constexpr size_t tsize = 26 * sizeof(int64_t); + + // clang-format off + are_equal(); + // On some systems involving NVCC the size of unique_ptr is 16 bytes. We haven't + // figured out how to detect those via macro preprocessors yet, so we use <= + // comparisons for the relevant fields. + is_le(); + is_le(); + are_equal(); + are_equal(); + are_equal(); + are_equal(); + are_equal(); + are_equal(); + are_equal(); + are_equal(); + is_le(); + // clang-format on + + return true; + } +#endif +}; + +// We use a class to encapsulate size-checking logic with +// templates to capture sizes and flags. We call this within +// a static assert to prove there is no run-time behaviour. +// Since the methods we call return either true or fail their +// own static_asserts, we should never see the error messages +// below. We have to provide it though for c++ <17. +static_assert( + C10_TensorImpl_Size_Check_Dummy_Class<>::check_sizes(), + "You should not see this message."); + +// Clean up after ourselves +#undef C10_NVCC +#undef C10_CUDA_VERSION_MAJOR +#undef C10_CUDA_VERSION +#undef C10_CLANG_MAJOR_VERSION +#undef C10_GCC_VERSION +#undef C10_GCC_VERSION_MINOR + +} // namespace c10 diff --git a/phivenv/Lib/site-packages/torch/include/c10/core/TensorOptions.h b/phivenv/Lib/site-packages/torch/include/c10/core/TensorOptions.h new file mode 100644 index 0000000000000000000000000000000000000000..facd2acb924a4e4c1212917f7e880248fb96b538 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/core/TensorOptions.h @@ -0,0 +1,782 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace c10 { + +inline ScalarType dtype_or_default(std::optional dtype) { + return dtype.value_or(get_default_dtype_as_scalartype()); +} + +inline caffe2::TypeMeta dtype_or_default( + std::optional dtype) { + return dtype.value_or(get_default_dtype()); +} + +inline Layout layout_or_default(std::optional layout) { + return layout.value_or(kStrided); +} + +inline Device device_or_default(std::optional device) { + return device.value_or(Device(kCPU)); +} + +inline bool pinned_memory_or_default(std::optional pinned_memory) { + return pinned_memory.value_or(false); +} + +/// A class to encapsulate construction axes of an Tensor. TensorOptions was +/// designed to support the Python style API for specifying construction options +/// on factory functions, e.g., +/// +/// torch.zeros(2, 3, dtype=torch.int32) +/// +/// Because C++ doesn't natively support keyword arguments, there must be +/// another way of specifying keyword-like arguments. TensorOptions is a +/// builder class which can be used to construct this "dictionary" of keyword +/// arguments: functions which support TensorOptions conventionally take this +/// argument optionally as their last argument. +/// +/// WARNING: In PyTorch, there are `torch::` variants of factory functions, +/// e.g., torch::zeros for at::zeros. These return Variables (while the +/// stock ATen functions return plain Tensors). If you mix these functions +/// up, you WILL BE SAD. +/// +/// Rather than use the constructor of this class directly, you should prefer to +/// use the constructor functions, and then chain setter methods on top of them. +/// +/// at::device(at::kCUDA).dtype(kInt) +/// at::dtype(at::kInt) +/// +/// Additionally, anywhere a TensorOptions is expected, you can directly +/// pass at::kCUDA / at::kInt, and it will implicitly convert to a +/// TensorOptions. +/// +/// Here are some recommended ways to create a 2x2 tensor of zeros +/// with certain properties. These all *implicitly* make use of +/// TensorOptions, even if they don't mention the class explicitly: +/// +/// at::zeros({2,2}, at::kCUDA); +/// at::zeros({2,2}, at::kLong); +/// at::zeros({2,2}, at::device(at::kCUDA).dtype(at::kLong())); +/// at::zeros({2,2}, at::device({at::kCUDA, 1})); // place on device 1 +/// at::zeros({2,2}, at::requires_grad()); +/// + +/// NOTE [ TensorOptions Constructors ] +/// +/// TensorOptions is like a dictionary with entries from the set: +/// {requires_grad, device, dtype, layout}, where each entry may be +/// unspecified (i.e., is optional). It is used to specify the properties of +/// tensors in many places both in C++ internal and API, e.g., tensor factory +/// methods like `at::empty({10}, options)`, tensor conversions like +/// `tensor.to(...)`, etc. +/// +/// To provide a simple API that is consistent with Python, where one can do +/// `torch.empty(sizes, X)` with `X` being a `torch.device`, `torch.dtype`, or a +/// `torch.layout`, we want TensorOptions to be implicitly convertible from +/// `ScalarType dtype`, `Layout layout` and `Device device`. Therefore, we have +/// three implicit constructors from each of these three types. +/// +/// This is sufficient for `ScalarType` and `Layout` as they are simple Enum +/// classes. However, `Device` is an ordinary class with implicit constructors +/// `Device(DeviceType, DeviceIndex = -1)` and `Device(std::string)` to be +/// consistent with Python API, where strings are treated as equivalent with a +/// `torch.device` object (e.g., "cuda:1" can be passed to everywhere a +/// `torch.device("cuda:1")` is accepted). To support the syntax +/// `at::empty({10}, {kCUDA, 1})` and `tensor.to(kCUDA)`, we need to make sure +/// that `TensorOptions` is implicitly constructible with any arguments that a +/// `Device` can constructed from. So we have, +/// +/// /* implicit */ TensorOptions(T&& device) : TensorOptions() { +/// this->set_device(device); +/// } +/// +/// template ::value>> +/// /* implicit */ TensorOptions(Args&&... args) +/// : TensorOptions(Device(std::forward(args)...)) {} +/// +/// +/// But this will be problematic. Consider this: `TensorOptions({kCUDA, 1})`. +/// Compiler will complain about ambiguity between the copy constructor and the +/// `Device` constructor because `{kCUDA, 1}` can be converted to both a +/// `TensorOption` and a `Device`. +/// +/// To get around this, we templatize the `Device` constructor. Since overload +/// resolution is done before template resolution, our problem is solved. + +DispatchKey computeDispatchKey( + std::optional dtype, + std::optional layout, + std::optional device); + +struct C10_API TensorOptions { + TensorOptions() + : requires_grad_(false), + pinned_memory_(false), + has_device_(false), + has_dtype_(false), + has_layout_(false), + has_requires_grad_(false), + has_pinned_memory_(false), + has_memory_format_(false) {} + + /// Constructs a `TensorOptions` object with the given layout. + /* implicit */ TensorOptions(Layout layout) : TensorOptions() { + this->set_layout(layout); + } + + /// Constructs a `TensorOptions` object with the given device. + /// See NOTE [ TensorOptions Constructors ] on why this is templatized. + template < + typename T, + typename = std::enable_if_t, Device>>> + /* implicit */ TensorOptions(T&& device) : TensorOptions() { + this->set_device(std::forward(device)); + } + + /// Constructs a `TensorOptions` object from arguments allowed in `Device` + /// constructors. + /// + /// See NOTE [ TensorOptions Constructors ]. + /// + /// NB: Ideally we only allow implicit constructors here. But there is no easy + /// way to detect them. So we have this one that allows explicit + /// constructors too. + template < + typename... Args, + typename = std::enable_if_t>> + /* implicit */ TensorOptions(Args&&... args) + : TensorOptions(Device(std::forward(args)...)) {} + + /// Constructs a `TensorOptions` object with the given dtype. + /* implicit */ TensorOptions(caffe2::TypeMeta dtype) : TensorOptions() { + this->set_dtype(dtype); + } + + /// legacy constructor to support ScalarType + /* implicit */ TensorOptions(ScalarType dtype) : TensorOptions() { + this->set_dtype(dtype); + } + + /// Constructs a `TensorOptions` object with the given memory format. + /* implicit */ TensorOptions(MemoryFormat memory_format) : TensorOptions() { + set_memory_format(memory_format); + } + + /// Return a copy of `TensorOptions` with `device` set to the given one, or + /// cleared if `device` is `nullopt`. + [[nodiscard]] TensorOptions device( + std::optional device) const noexcept { + TensorOptions r = *this; + r.set_device(device); + return r; + } + + /// Return a copy of `TensorOptions` with `device` set to the given one. + /// (This overload ensures that variadic template std::optional constructor + /// for Device work correctly.) + template + [[nodiscard]] TensorOptions device(Args&&... args) const noexcept { + return device( + std::optional(std::in_place, std::forward(args)...)); + } + + /// Return a copy of `TensorOptions`, but with device set to CUDA, and the + /// device index set to the given one. + /// + /// TODO: This function encourages bad behavior (assuming CUDA is + /// the only device that matters). Get rid of it / rename it. + [[nodiscard]] TensorOptions device_index( + c10::DeviceIndex device_index) const noexcept { + return device(Device::Type::CUDA, device_index); + } + + /// Return a copy of `TensorOptions` with `dtype` set to the given one. + [[nodiscard]] TensorOptions dtype( + std::optional dtype) const noexcept { + TensorOptions r = *this; + r.set_dtype(dtype); + return r; + } + + // legacy function to support ScalarType + [[nodiscard]] TensorOptions dtype( + std::optional dtype) const noexcept { + TensorOptions r = *this; + r.set_dtype(dtype); + return r; + } + + // Since dtype is taken... + template + TensorOptions& dtype() { + dtype_ = caffe2::TypeMeta::Make(); + has_dtype_ = true; + return *this; + } + + /// Sets the layout of the `TensorOptions`. + [[nodiscard]] TensorOptions layout( + std::optional layout) const noexcept { + TensorOptions r = *this; + r.set_layout(layout); + return r; + } + + /// Sets the `requires_grad` property of the `TensorOptions`. + [[nodiscard]] TensorOptions requires_grad( + std::optional requires_grad) const noexcept { + TensorOptions r = *this; + r.set_requires_grad(requires_grad); + return r; + } + + /// Sets the `pinned_memory` property on the `TensorOptions`. + [[nodiscard]] TensorOptions pinned_memory( + std::optional pinned_memory) const noexcept { + TensorOptions r = *this; + r.set_pinned_memory(pinned_memory); + return r; + } + + /// Sets the `memory_format` property on `TensorOptions`. + [[nodiscard]] TensorOptions memory_format( + std::optional memory_format) const noexcept { + TensorOptions r = *this; + r.set_memory_format(memory_format); + return r; + } + + /// Returns the device of the `TensorOptions`. + Device device() const noexcept { + return device_or_default(device_opt()); + } + + /// Returns whether the device is specified. + bool has_device() const noexcept { + return has_device_; + } + + /// Returns the device of the `TensorOptions`, or `std::nullopt` if + /// device is not specified. + std::optional device_opt() const noexcept { + return has_device_ ? std::make_optional(device_) : std::nullopt; + } + + /// Returns the device index of the `TensorOptions`. + c10::DeviceIndex device_index() const noexcept { + return device().index(); + } + + /// Returns the dtype of the `TensorOptions`. + caffe2::TypeMeta dtype() const noexcept { + return dtype_or_default(dtype_opt()); + } + + /// Returns whether the dtype is specified. + bool has_dtype() const noexcept { + return has_dtype_; + } + + /// Returns the dtype of the `TensorOptions`, or `std::nullopt` if + /// device is not specified. + std::optional dtype_opt() const noexcept { + return has_dtype_ ? std::make_optional(dtype_) : std::nullopt; + } + + /// Returns the layout of the `TensorOptions`. + Layout layout() const noexcept { + return layout_or_default(layout_opt()); + } + + /// Returns whether the layout is specified. + bool has_layout() const noexcept { + return has_layout_; + } + + /// Returns the layout of the `TensorOptions`, or `std::nullopt` if + /// layout is not specified. + std::optional layout_opt() const noexcept { + return has_layout_ ? std::make_optional(layout_) : std::nullopt; + } + + /// Returns the `requires_grad` property of the `TensorOptions`. + bool requires_grad() const noexcept { + return has_requires_grad_ ? requires_grad_ : false; + } + + /// Returns whether the `requires_grad` is specified. + bool has_requires_grad() const noexcept { + return has_requires_grad_; + } + + /// Returns the `requires_grad` property of the `TensorOptions`, or + /// `std::nullopt` if `requires_grad` is not specified. + std::optional requires_grad_opt() const noexcept { + return has_requires_grad_ ? std::make_optional(requires_grad_) + : std::nullopt; + } + + /// Returns the `pinned_memory` property of the `TensorOptions`. + bool pinned_memory() const noexcept { + return pinned_memory_or_default(pinned_memory_opt()); + } + + /// Returns whether the `pinned_memory` is specified. + bool has_pinned_memory() const noexcept { + return has_pinned_memory_; + } + + /// Returns if the layout is sparse + bool is_sparse() const { + return layout_ == c10::Layout::Sparse; + } + + /// Returns if the layout is sparse CSR, deprecated, use + /// is_sparse_compressed() instead + bool is_sparse_csr() const { + return layout_ == c10::Layout::SparseCsr; + } + + bool is_sparse_compressed() const { + return layout_ == c10::Layout::SparseCsr || + layout_ == c10::Layout::SparseCsc || + layout_ == c10::Layout::SparseBsr || layout_ == c10::Layout::SparseBsc; + } + + // For compatibility with legacy tensor.type() comparisons + bool type_equal(const TensorOptions& other) const { + return computeDispatchKey() == other.computeDispatchKey() && + typeMetaToScalarType(dtype_) == typeMetaToScalarType(other.dtype()); + } + + /// Returns the `pinned_memory` property of the `TensorOptions`, or + /// `std::nullopt` if `pinned_memory` is not specified. + std::optional pinned_memory_opt() const noexcept { + return has_pinned_memory_ ? std::make_optional(pinned_memory_) + : std::nullopt; + } + + /// Returns whether the `memory_layout` is specified + bool has_memory_format() const noexcept { + return has_memory_format_; + } + + // NB: memory_format() getter is PURPOSELY not defined, as the default + // behavior of memory_format varies from function to function. + + /// Returns the `memory_layout` property of `TensorOptions, or + /// `std::nullopt` if `memory_format` is not specified. + std::optional memory_format_opt() const noexcept { + return has_memory_format_ ? std::make_optional(memory_format_) + : std::nullopt; + } + + // Resolves the ATen backend specified by the current construction axes. + // TODO: Deprecate this + Backend backend() const { + return at::dispatchKeyToBackend(computeDispatchKey()); + } + + /// Return the right-biased merge of two TensorOptions. This has the + /// effect of overwriting settings from self with specified options + /// of options. + /// + /// NB: This merging operation does NOT respect device merges. + /// For example, if you device({kCUDA, 1}).merge_in(kCUDA) + /// you will get kCUDA in the end! Functions like Tensor.new_empty + /// ensure the right device is selected anyway by way of a + /// device guard. + /// + TensorOptions merge_in(TensorOptions options) const noexcept { + TensorOptions merged = *this; + if (options.has_device()) + merged.set_device(options.device_opt()); + if (options.has_dtype()) + merged.set_dtype(options.dtype_opt()); + if (options.has_layout()) + merged.set_layout(options.layout_opt()); + // NB: requires grad is right biased; not a logical AND/OR! + if (options.has_requires_grad()) + merged.set_requires_grad(options.requires_grad_opt()); + if (options.has_pinned_memory()) + merged.set_pinned_memory(options.pinned_memory_opt()); + if (options.has_memory_format()) + merged.set_memory_format(options.memory_format_opt()); + return merged; + } + + // TODO remove after TensorOptions rationalization + TensorOptions merge_memory_format( + std::optional optional_memory_format) const noexcept { + TensorOptions merged = *this; + if (optional_memory_format.has_value()) { + merged.set_memory_format(optional_memory_format); + } + return merged; + } + + // INVARIANT: computeDispatchKey returns only the subset of dispatch keys for + // which dispatchKeyToBackend is injective, if it is defined at all (for + // the most part, this just means that this function never returns an + // Autograd key) + DispatchKey computeDispatchKey() const { + return c10::computeDispatchKey( + optTypeMetaToScalarType(dtype_opt()), layout_opt(), device_opt()); + } + + private: + // These methods are currently private because I'm not sure if it's wise + // to actually publish them. They are methods because I need them in + // the constructor and the functional API implementation. + // + // If you really, really need it, you can make these public, but check if you + // couldn't just do what you need with the functional API. Similarly, these + // methods are not chainable, because if you wanted chaining, you probably + // want to use the functional API instead. (It's probably OK to make + // these chainable, because these functions are all explicitly annotated + // with a ref-qualifier, the trailing &, that makes them illegal to call + // on temporaries.) + + /// Mutably set the device of `TensorOptions`. + void set_device(std::optional device) & noexcept { + if (device) { + device_ = *device; + has_device_ = true; + } else { + has_device_ = false; + } + } + + /// Mutably set the dtype of `TensorOptions`. + void set_dtype(std::optional dtype) & noexcept { + if (dtype) { + dtype_ = *dtype; + has_dtype_ = true; + } else { + has_dtype_ = false; + } + } + + // legacy function to support ScalarType + void set_dtype(std::optional dtype) & noexcept { + if (dtype) { + dtype_ = scalarTypeToTypeMeta(*dtype); + has_dtype_ = true; + } else { + has_dtype_ = false; + } + } + + /// Mutably set the layout of `TensorOptions`. + void set_layout(std::optional layout) & noexcept { + if (layout) { + layout_ = *layout; + has_layout_ = true; + } else { + has_layout_ = false; + } + } + + /// Mutably set the `requires_grad` property of `TensorOptions`. + void set_requires_grad(std::optional requires_grad) & noexcept { + if (requires_grad) { + requires_grad_ = *requires_grad; + has_requires_grad_ = true; + } else { + has_requires_grad_ = false; + } + } + + /// Mutably set the `pinned_memory` property of `TensorOptions`. + void set_pinned_memory(std::optional pinned_memory) & noexcept { + if (pinned_memory) { + pinned_memory_ = *pinned_memory; + has_pinned_memory_ = true; + } else { + has_pinned_memory_ = false; + } + } + + /// Mutably set the `memory_Format` property of `TensorOptions`. + void set_memory_format(std::optional memory_format) & noexcept { + if (memory_format) { + memory_format_ = *memory_format; + has_memory_format_ = true; + } else { + has_memory_format_ = false; + } + } + + // WARNING: If you edit TensorOptions to add more options, you + // may need to adjust the implementation of Tensor::options. + // The criteria for whether or not Tensor::options must be adjusted + // is whether or not the new option you added should preserved + // by functions such as empty_like(); if it should be preserved, + // you must adjust options(). + // + // TODO: MemoryFormat is not implemented in this way + + // NB: We didn't use std::optional here, because then we can't pack + // the has_***_ boolean fields. + + Device device_ = at::kCPU; // 16-bit + caffe2::TypeMeta dtype_ = caffe2::TypeMeta::Make(); // 16-bit + Layout layout_ = at::kStrided; // 8-bit + MemoryFormat memory_format_ = MemoryFormat::Contiguous; // 8-bit + + // Bitmask required here to get this to fit inside 32 bits (or even 64 bits, + // for that matter) + + bool requires_grad_ : 1; + bool pinned_memory_ : 1; + + bool has_device_ : 1; + bool has_dtype_ : 1; + bool has_layout_ : 1; + bool has_requires_grad_ : 1; + bool has_pinned_memory_ : 1; + bool has_memory_format_ : 1; +}; + +// We should aspire to fit in one machine-size word; but a size greater than two +// words is too much. (We are doing terribly on 32-bit archs, where we require +// three machine size words to store tensor options. Eek!) +static_assert( + sizeof(TensorOptions) <= sizeof(int64_t) * 2, + "TensorOptions must fit in 128-bits"); + +/// Convenience function that returns a `TensorOptions` object with the `dtype` +/// set to the given one. +inline TensorOptions dtype(caffe2::TypeMeta dtype) { + return TensorOptions().dtype(dtype); +} + +// legacy function to support ScalarType +inline TensorOptions dtype(ScalarType dtype) { + return TensorOptions().dtype(scalarTypeToTypeMeta(dtype)); +} + +/// Convenience function that returns a `TensorOptions` object with the `layout` +/// set to the given one. +inline TensorOptions layout(Layout layout) { + return TensorOptions().layout(layout); +} + +/// Convenience function that returns a `TensorOptions` object with the `device` +/// set to the given one. +inline TensorOptions device(Device device) { + return TensorOptions().device(device); +} + +/// Convenience function that returns a `TensorOptions` object with the +/// `device` set to CUDA and the `device_index` set to the given one. +inline TensorOptions device_index(c10::DeviceIndex device_index) { + return TensorOptions().device_index(device_index); +} + +/// Convenience function that returns a `TensorOptions` object with the +/// `requires_grad` set to the given one. +inline TensorOptions requires_grad(bool requires_grad = true) { + return TensorOptions().requires_grad(requires_grad); +} + +/// Convenience function that returns a `TensorOptions` object with the +/// `memory_format` set to the given one. +inline TensorOptions memory_format(MemoryFormat memory_format) { + return TensorOptions().memory_format(memory_format); +} + +C10_API std::ostream& operator<<( + std::ostream& stream, + const TensorOptions& options); + +template +inline TensorOptions dtype() { + return dtype(caffe2::TypeMeta::Make()); +} + +inline std::string toString(const TensorOptions& options) { + std::ostringstream stream; + stream << options; + return stream.str(); +} + +// This is intended to be a centralized location by which we can determine +// what an appropriate DispatchKey for a tensor is. +inline DispatchKey computeDispatchKey( + std::optional dtype, + std::optional layout, + std::optional device) { + const auto layout_ = layout_or_default(layout); + const auto device_ = device_or_default(device); + switch (layout_) { + case Layout::Jagged: + case Layout::Strided: { + const auto dtype_ = dtype_or_default(dtype); + switch (device_.type()) { +#define DO_CASE(device, _) \ + case c10::DeviceType::device: { \ + if (isQIntType(dtype_)) { \ + return DispatchKey::Quantized##device; \ + } \ + return DispatchKey::device; \ + } + C10_FORALL_BACKEND_DEVICE_TYPES(DO_CASE, unused) +#undef DO_CASE + case c10::DeviceType::FPGA: + return DispatchKey::FPGA; + case c10::DeviceType::MAIA: + return DispatchKey::MAIA; + case c10::DeviceType::Vulkan: + return DispatchKey::Vulkan; + case c10::DeviceType::Metal: + return DispatchKey::Metal; + case c10::DeviceType::MKLDNN: + case c10::DeviceType::OPENGL: + case c10::DeviceType::OPENCL: + case c10::DeviceType::IDEEP: + TORCH_INTERNAL_ASSERT( + 0, + "This is a grandfathered Caffe2 device type ", + device_.type(), + ", it shouldn't ever convert to a DispatchKey. File a bug describing what you were doing if you think this is in error."); + default: + TORCH_CHECK_NOT_IMPLEMENTED( + false, + "Unsupported device type for dense layout: ", + device_.type()); + } + } + case Layout::Sparse: + switch (device_.type()) { +#define DO_CASE(device, _) \ + case c10::DeviceType::device: { \ + return DispatchKey::Sparse##device; \ + } + C10_FORALL_BACKEND_DEVICE_TYPES(DO_CASE, unused) +#undef DO_CASE + default: + TORCH_CHECK_NOT_IMPLEMENTED( + false, + "Unsupported device type for sparse layout: ", + device_.type()); + } + case Layout::Mkldnn: + switch (device_.type()) { + case c10::DeviceType::CPU: + return DispatchKey::MkldnnCPU; + default: + TORCH_CHECK_NOT_IMPLEMENTED( + false, + "Unsupported device type for mkldnn layout: ", + device_.type()); + } + case Layout::SparseCsr: + case Layout::SparseCsc: + case Layout::SparseBsr: + case Layout::SparseBsc: + switch (device_.type()) { +#define DO_CASE(device, _) \ + case c10::DeviceType::device: { \ + return DispatchKey::SparseCsr##device; \ + } + C10_FORALL_BACKEND_DEVICE_TYPES(DO_CASE, unused) +#undef DO_CASE + default: + TORCH_CHECK_NOT_IMPLEMENTED( + false, + "Unsupported device type for ", + layout_, + " layout: ", + device_.type()); + } + default: + TORCH_CHECK(false, "Unsupported layout: ", layout_); + } +} + +inline Layout dispatchKeyToLayout(DispatchKey dispatch_key) { + switch (dispatch_key) { +#define DO_CASE(bc, _) case DispatchKey::Sparse##bc: + C10_FORALL_BACKEND_COMPONENTS(DO_CASE, unused) +#undef DO_CASE + return Layout::Sparse; +#define DO_CASE(bc, _) case DispatchKey::SparseCsr##bc: + C10_FORALL_BACKEND_COMPONENTS(DO_CASE, unused) +#undef DO_CASE + TORCH_CHECK( + false, "Cannot map DispatchKey ", dispatch_key, " to a unique layout."); + case DispatchKey::MkldnnCPU: + return Layout::Mkldnn; + default: + return Layout::Strided; + } +} + +inline c10::DeviceType dispatchKeyToDeviceType(DispatchKey dispatch_key) { + switch (dispatch_key) { + // stuff that's real +#define DO_CASE(suffix, prefix) \ + case DispatchKey::prefix##suffix: \ + return c10::DeviceType::suffix; +#define DO_CASES(_, prefix) C10_FORALL_BACKEND_DEVICE_TYPES(DO_CASE, prefix) + C10_FORALL_FUNCTIONALITY_KEYS(DO_CASES) +#undef DO_CASES +#undef DO_CASE + + case DispatchKey::MkldnnCPU: + return c10::DeviceType::CPU; + case DispatchKey::Vulkan: + return c10::DeviceType::Vulkan; + + case DispatchKey::MAIA: + return c10::DeviceType::MAIA; + default: + TORCH_CHECK( + false, + "DispatchKey ", + dispatch_key, + " doesn't correspond to a device"); + } +} + +inline TensorOptions dispatchKeyToTensorOptions(DispatchKey dispatch_key) { + return TensorOptions() + .layout(dispatchKeyToLayout(dispatch_key)) + .device(dispatchKeyToDeviceType(dispatch_key)); +} + +namespace detail { +inline bool backend_supports_empty_operator(const TensorOptions& options) { + // Quantized backends don't support at::empty(). + // They have separate operators like at::empty_quantized() that take in + // extra information about how to quantize the tensor. + return !isQIntType(typeMetaToScalarType(options.dtype())); +} + +} // namespace detail + +} // namespace c10 diff --git a/phivenv/Lib/site-packages/torch/include/c10/core/UndefinedTensorImpl.h b/phivenv/Lib/site-packages/torch/include/c10/core/UndefinedTensorImpl.h new file mode 100644 index 0000000000000000000000000000000000000000..5d38e2e8f893573749c73aedba732d6ee5e65b57 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/core/UndefinedTensorImpl.h @@ -0,0 +1,49 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace c10 { + +struct C10_API UndefinedTensorImpl final : public TensorImpl { + public: + // Without this, we get: + // error: identifier "at::UndefinedTensorImpl::_singleton" is undefined in + // device code + // (ostensibly because the constexpr tricks MSVC into trying to compile this + // function for device as well). +#ifdef _WIN32 + static inline TensorImpl* singleton() { + return &getInstance(); + } +#else + static constexpr inline TensorImpl* singleton() { + return &_singleton; + } +#endif + +#ifdef DEBUG + bool has_storage() const override; +#endif + void set_storage_offset(int64_t offset) override; + + protected: + bool is_contiguous_custom(MemoryFormat format) const override; + IntArrayRef strides_custom() const override; + SymIntArrayRef sym_strides_custom() const override; + + private: + UndefinedTensorImpl(); +#ifdef _WIN32 + static UndefinedTensorImpl& getInstance(); +#else + static UndefinedTensorImpl _singleton; +#endif + const char* tensorimpl_type_name() const override; +}; + +} // namespace c10 diff --git a/phivenv/Lib/site-packages/torch/include/c10/core/WrapDimMinimal.h b/phivenv/Lib/site-packages/torch/include/c10/core/WrapDimMinimal.h new file mode 100644 index 0000000000000000000000000000000000000000..cc3b0d3267171a60c5725d8d9185772a2b4601ea --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/core/WrapDimMinimal.h @@ -0,0 +1,48 @@ +#pragma once + +#include +#include +#include +#include +#include + +namespace c10 { + +namespace detail { +// This template can only be specialized at int64_t and c10::SymInt; +// you'll get linker errors otherwise +template +C10_API T maybe_wrap_dim_slow(T dim, T dim_post_expr, bool wrap_scalar); +} // namespace detail + +template +T _maybe_wrap_dim(T dim, T dim_post_expr, bool wrap_scalar = true) { + // Inline the fast paths + if (C10_LIKELY(dim_post_expr * -1 <= dim && dim < dim_post_expr)) { + // For SymInts, we want an explicit control flow to trigger a guard, so we + // may as well branch too. + if (dim < 0) { + return dim + dim_post_expr; + } + return dim; + } + // Check edge-cases out-of-line (wrapping scalars and out-of-bounds errors) + return c10::detail::maybe_wrap_dim_slow( + std::move(dim), std::move(dim_post_expr), wrap_scalar); +} + +inline int64_t maybe_wrap_dim( + int64_t dim, + int64_t dim_post_expr, + bool wrap_scalar = true) { + return _maybe_wrap_dim(dim, dim_post_expr, wrap_scalar); +} + +inline c10::SymInt maybe_wrap_dim( + c10::SymInt dim, + c10::SymInt dim_post_expr, + bool wrap_scalar = true) { + return _maybe_wrap_dim(std::move(dim), std::move(dim_post_expr), wrap_scalar); +} + +} // namespace c10 diff --git a/phivenv/Lib/site-packages/torch/include/c10/core/alignment.h b/phivenv/Lib/site-packages/torch/include/c10/core/alignment.h new file mode 100644 index 0000000000000000000000000000000000000000..32cac40eb982d97808989aa6245ac4081c6eb824 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/core/alignment.h @@ -0,0 +1,21 @@ +#pragma once + +#include + +namespace c10 { + +#ifdef C10_MOBILE +// Use 16-byte alignment on mobile +// - ARM NEON AArch32 and AArch64 +// - x86[-64] < AVX +constexpr size_t gAlignment = 16; +#else +// Use 64-byte alignment should be enough for computation up to AVX512. +constexpr size_t gAlignment = 64; +#endif + +constexpr size_t gPagesize = 4096; +// since the default thp pagesize is 2MB, enable thp only +// for buffers of size 2MB or larger to avoid memory bloating +constexpr size_t gAlloc_threshold_thp = static_cast(2) * 1024 * 1024; +} // namespace c10 diff --git a/phivenv/Lib/site-packages/torch/include/c10/core/impl/COW.h b/phivenv/Lib/site-packages/torch/include/c10/core/impl/COW.h new file mode 100644 index 0000000000000000000000000000000000000000..503a819d526816252c49f44eb03d3016a68d17d5 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/core/impl/COW.h @@ -0,0 +1,32 @@ +#pragma once + +#include +#include + +namespace c10 { +struct StorageImpl; +class DataPtr; +} // namespace c10 + +namespace c10::impl::cow { + +// Creates a Copy-on-write (COW) clone of the given storage. This will also +// convert the given storage into a COW storage if it is not COW already. +// +// Converting the storage into a COW storage will not be successful if the +// storage's DataPtr has some context (`DataPtr::get_context()`) which is not +// equal to the data pointer (`DataPtr::get()`). In this case, a nullptr is +// returned. +C10_API c10::intrusive_ptr lazy_clone_storage( + StorageImpl& storage); + +// Check if a storage has a simple DataPtr with no abnormal context +C10_API bool has_simple_data_ptr(const c10::StorageImpl& storage); + +// Check if a DataPtr is COW +C10_API bool is_cow_data_ptr(const c10::DataPtr& data_ptr); + +// Eagerly copies a COW storage's data, turning it into a non-COW storage. +C10_API void materialize_cow_storage(StorageImpl& storage); + +} // namespace c10::impl::cow diff --git a/phivenv/Lib/site-packages/torch/include/c10/core/impl/COWDeleter.h b/phivenv/Lib/site-packages/torch/include/c10/core/impl/COWDeleter.h new file mode 100644 index 0000000000000000000000000000000000000000..58378c4ec2e3b9826cd7cb4cc11ab610662b3b16 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/core/impl/COWDeleter.h @@ -0,0 +1,66 @@ +#pragma once + +#include +#include + +#include +#include +#include +#include +#include + +namespace c10::impl::cow { + +// A COWDeleterContext object is used as the `ctx` argument for DataPtr +// to implement a Copy-on-write (COW) DataPtr. +class C10_API COWDeleterContext { + public: + // Creates an instance, holding the pair of data and original + // deleter. + // + // Note that the deleter will only be called in our destructor if + // the last reference to this goes away without getting + // materialized. + explicit COWDeleterContext(std::unique_ptr data); + + // Increments the current refcount. + void increment_refcount(); + + // See README.md in this directory to understand the locking + // strategy. + + // Represents a reference to the context. + // + // This is returned by decrement_refcount to allow the caller to + // copy the data under the shared lock. + using NotLastReference = std::shared_lock; + + // Represents the last reference to the context. + // + // This will be returned by decrement_refcount when it is the last + // reference remaining and after any pending copies have completed. + using LastReference = std::unique_ptr; + + // Decrements the refcount, returning a handle indicating what to + // do with it. + std::variant decrement_refcount(); + + private: + // The destructor is hidden, this should only ever be used within + // UniqueVoidPtr using cow::delete_context as the deleter. + ~COWDeleterContext(); + + std::shared_mutex mutex_; + std::unique_ptr data_; + std::atomic refcount_ = 1; +}; + +// `cow_deleter` is used as the `ctx_deleter` for DataPtr to implement a COW +// DataPtr. +// +// Warning: This should only be called on a pointer to a COWDeleterContext that +// was allocated on the heap with `new`, because when the refcount reaches 0, +// the context is deleted with `delete`. +C10_API void cow_deleter(void* ctx); + +} // namespace c10::impl::cow diff --git a/phivenv/Lib/site-packages/torch/include/c10/core/impl/DeviceGuardImplInterface.h b/phivenv/Lib/site-packages/torch/include/c10/core/impl/DeviceGuardImplInterface.h new file mode 100644 index 0000000000000000000000000000000000000000..3b124f094f3aacbb2ca0b91d63ca2f1132a55a7d --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/core/impl/DeviceGuardImplInterface.h @@ -0,0 +1,375 @@ +#pragma once + +#include +#include +#include +#include + +// Just for C10_ANONYMOUS_VARIABLE +#include + +#include +#include + +namespace c10 { + +// Forward declaration +class DataPtr; + +/** + * Note [Flags defining the behavior of events] + * + * PYTORCH_DEFAULT and BACKEND_DEFAULT are valid for all backends. The + * BACKEND_DEFAULT is what a particular backend would select if no + * flags were given. PYTORCH_DEFAULT is the PyTorch's framework default + * choice for events on that backend, which may not be the same. + * + * The mapping of PYTORCH_DEFAULT and BACKEND_DEFAULT is done by each + * backend implementation. + */ +enum class EventFlag { + // Disable timing + PYTORCH_DEFAULT, + // Enable timing + BACKEND_DEFAULT, + // FOR TESTING ONLY + INVALID +}; + +namespace impl { + +/** + * DeviceGuardImplInterface represents the virtual interface which provides + * functionality to provide an RAII class for device and stream switching, + * via DeviceGuard. Every distinct device type, e.g., CUDA and HIP, is + * expected to implement and register an implementation of this interface. + * All classes which inherit from DeviceGuardImplInterface should be declared + * 'final'. + * + * This class exists because we provide a unified interface for performing + * device guards via DeviceGuard, but we cannot assume that we have actually + * compiled against the, e.g., CUDA library, which actually implements + * this guard functionality. In this case, a dynamic dispatch is required + * to cross the library boundary. + * + * If possible, you should directly use implementations of this interface; + * those uses will be devirtualized. + */ +struct C10_API DeviceGuardImplInterface { + DeviceGuardImplInterface() = default; + DeviceGuardImplInterface(const DeviceGuardImplInterface&) = default; + DeviceGuardImplInterface& operator=(const DeviceGuardImplInterface&) = + default; + DeviceGuardImplInterface(DeviceGuardImplInterface&&) noexcept = default; + DeviceGuardImplInterface& operator=(DeviceGuardImplInterface&&) noexcept = + default; + + /** + * Return the type of device managed by this guard implementation. + */ + virtual DeviceType type() const = 0; + + /** + * Set the current device to Device, and return the previous Device. + */ + virtual Device exchangeDevice(Device) const = 0; + // NB: Implementations of exchangeDevice can be a bit boilerplatey. You might + // consider replacing exchangeDevice with a non-virtual function with a baked + // in implementation; however, note that this will triple the number of + // virtual calls (when you implement exchangeDevice in a final subclass, + // the compiler gets to devirtualize everything; it won't do that if you don't + // define it in the subclass!) A common way to solve this problem is to use + // some sort of CRTP; however, we can template DeviceGuardImplInterface since + // we really *do* need it to be virtual. A little boilerplate seems easiest + // to explain. (Another way around this problem is to provide inline + // functions that provide the default implementations, but this seems a little + // hard to explain. In any case, we're only going to have on order of ten + // implementations of this anyway.) + + /** + * Get the current device. + */ + virtual Device getDevice() const = 0; + + /** + * Set the current device to Device. + */ + virtual void setDevice(Device) const = 0; + + /** + * Set the current device to Device, without checking for errors + * (so, e.g., this can be called from a destructor). + */ + virtual void uncheckedSetDevice(Device) const noexcept = 0; + + /** + * Get the current stream for a given device. + */ + virtual Stream getStream(Device) const = 0; + + /** + * Get the default stream for a given device. + */ + virtual Stream getDefaultStream(Device) const { + TORCH_CHECK(false, "Backend doesn't support acquiring a default stream.") + } + + /** + * Get a stream from the global pool for a given device. + */ + virtual Stream getStreamFromGlobalPool(Device, bool isHighPriority = false) + const { + (void)isHighPriority; // Suppress unused variable warning + TORCH_CHECK(false, "Backend doesn't support acquiring a stream from pool.") + } + + /** + * Return a new stream for a given device and priority. The stream will be + * copied and shared around, device backend should be able to correctly handle + * the lifetime of the stream. + */ + virtual Stream getNewStream(Device, int priority = 0) const { + (void)priority; + TORCH_CHECK(false, "Backend doesn't support create a new Stream.") + } + + /** + * Set a stream to be the thread local current stream for its device. + * Return the previous stream for that device. You are NOT required + * to set the current device to match the device of this stream. + */ + virtual Stream exchangeStream(Stream) const = 0; + + /** + * Destroys the given event. + */ + virtual void destroyEvent(void* /*event*/, const DeviceIndex /*device_index*/) + const noexcept {} + + /** + * Increments the event's version and enqueues a job with this version + * in the stream's work queue. When the stream process that job + * it notifies all streams waiting on / blocked by that version of the + * event to continue and marks that version as recorded. + * */ + virtual void record( + void** /*event*/, + const Stream& /*stream*/, + const DeviceIndex /*device_index*/, + const c10::EventFlag /*flag*/) const { + TORCH_CHECK(false, "Backend doesn't support events."); + } + + /** + * Does nothing if the event has not been scheduled to be recorded. + * If the event was previously enqueued to be recorded, a command + * to wait for the version of the event that exists at the time of this call + * is inserted in the stream's work queue. + * When the stream reaches this command it will stop processing + * additional commands until that version of the event is marked as recorded. + */ + virtual void block(void* /*event*/, const Stream& /*stream*/) const { + TORCH_CHECK(false, "Backend doesn't support events."); + } + + /** + * Returns true if (and only if) + * (1) the event has never been scheduled to be recorded + * (2) the current version is marked as recorded. + * Returns false otherwise. + */ + virtual bool queryEvent(void* /*event*/) const { + TORCH_CHECK(false, "Backend doesn't support events."); + } + + /** + * Get the number of devices. WARNING: This is REQUIRED to not raise + * an exception. If there is some sort of problem, e.g., driver error, + * you should report that there are zero available devices. + */ + virtual DeviceIndex deviceCount() const noexcept = 0; + + /** + * Return true if all the work previously enqueued on the stream for + * asynchronous execution has completed running on the device. + */ + virtual bool queryStream(const Stream& /*stream*/) const { + TORCH_CHECK(false, "Backend doesn't support querying streams."); + } + + /** + * Wait (by blocking the calling thread) until all the work previously + * enqueued on the stream has completed running on the device. + */ + virtual void synchronizeStream(const Stream& /*stream*/) const { + TORCH_CHECK(false, "Backend doesn't support synchronizing streams."); + } + + /** + * Wait (by blocking the calling thread) until all the work previously + * recorded on the event has completed running on the device. + */ + virtual void synchronizeEvent(void* /*event*/) const { + TORCH_CHECK(false, "Backend doesn't support synchronizing events."); + } + + /** + * Wait (by blocking the calling thread) until all the work previously + * enqueued on the device has been completed. + */ + virtual void synchronizeDevice(const DeviceIndex /*device_index*/) const { + TORCH_CHECK( + false, "Backend doesn't support synchronizing all streams on device."); + } + + /** + * Ensure the caching allocator (if any) is aware that the given DataPtr is + * being used on the given stream, and that it should thus avoid recycling the + * DataPtr until all work on that stream is done. + */ + virtual void recordDataPtrOnStream(const c10::DataPtr&, const Stream&) const { + } + + /** + * Fetch the elapsed time between two recorded events. + */ + virtual double elapsedTime( + void* /*event1*/, + void* /*event2*/, + const DeviceIndex /*device_index*/) const { + TORCH_CHECK(false, "Backend doesn't support elapsedTime."); + } + + /** + * Intended use of this class is to leak the DeviceGuardImpl at program end. + * So you better not call the destructor, buster! + */ + virtual ~DeviceGuardImplInterface() = default; +}; + +// A no-op device guard impl that doesn't do anything interesting. Useful +// for devices that don't actually have a concept of device index. Prominent +// examples are CPU and Meta. +template +struct NoOpDeviceGuardImpl final : public DeviceGuardImplInterface { + NoOpDeviceGuardImpl() = default; + DeviceType type() const override { + return D; + } + Device exchangeDevice(Device) const override { + return Device(D, -1); // no-op + } + Device getDevice() const override { + return Device(D, -1); + } + void setDevice(Device) const override { + // no-op + } + void uncheckedSetDevice(Device) const noexcept override { + // no-op + } + Stream getStream(Device) const noexcept override { + // no-op + return Stream(Stream::DEFAULT, Device(D, -1)); + } + + Stream getNewStream(Device, int priority = 0) const override { + // no-op + (void)priority; + return Stream(Stream::DEFAULT, Device(D, -1)); + } + + // NB: These do NOT set the current device + Stream exchangeStream(Stream) const noexcept override { + // no-op + return Stream(Stream::DEFAULT, Device(D, -1)); + } + DeviceIndex deviceCount() const noexcept override { + return 1; + } + + // Event-related functions + void record( + void** /*event*/, + const Stream& /*stream*/, + const DeviceIndex /*device_index*/, + const EventFlag /*flag*/) const override { + TORCH_CHECK(false, D, " backend doesn't support events."); + } + void block(void* /*event*/, const Stream& /*stream*/) const override { + TORCH_CHECK(false, D, " backend doesn't support events.") + } + bool queryEvent(void* /*event*/) const override { + TORCH_CHECK(false, D, " backend doesn't support events.") + } + void destroyEvent(void* /*event*/, const DeviceIndex /*device_index*/) + const noexcept override {} + + // Stream-related functions + bool queryStream(const Stream& /*stream*/) const override { + return true; + } + void synchronizeStream(const Stream& /*stream*/) const override { + // Don't wait for anything. + } +}; + +// The registry is NON-owning. Each stored pointer is std::atomic so +// that under all interleavings of registry calls the structure is +// race-free. This doesn't cost us anything on reads in X86. (An +// unsynchronized implementation probably is OK too, but I didn't want +// to prove that we never read from device_guard_impl_registry at the +// same time some registration is occurring. Shiver.) +// +// I'd like this registry to be valid even at program destruction time +// (in case someone uses a DeviceGuard in a destructor to do some cleanup +// in the CUDA API.) Since there are no direct accesses of the underlying +// owning objects which I can use to enforce initialization order (unlike +// in a Meyer singleton), it implies that you must *leak* objects when +// putting them in the registry. This is done by deleting the destructor +// on DeviceGuardImplInterface. +extern C10_API std::array< + std::atomic, + static_cast(DeviceType::COMPILE_TIME_MAX_DEVICE_TYPES)> + device_guard_impl_registry; + +// I can't conveniently use c10/util/Registry.h for the following reason: +// c10/util/Registry.h gives me a slow way of Create'ing a object of some +// interface from the registry, but no way of quickly accessing an already +// created object. I'll be banging on getDeviceGuardImpl every time we do a +// DeviceGuard, so I really don't want to be doing an unordered_map lookup. +// Better if the registration mechanism directly drops its implementation +// into device_guard_impl_registry. + +class C10_API DeviceGuardImplRegistrar { + public: + DeviceGuardImplRegistrar(DeviceType, const DeviceGuardImplInterface*); +}; + +#define C10_REGISTER_GUARD_IMPL(DevType, DeviceGuardImpl) \ + static ::c10::impl::DeviceGuardImplRegistrar C10_ANONYMOUS_VARIABLE( \ + g_##DeviceType)(::c10::DeviceType::DevType, new DeviceGuardImpl()); + +inline const DeviceGuardImplInterface* getDeviceGuardImpl(DeviceType type) { + // Two adjacent int16_t fields DeviceType and DeviceIndex has field access + // miscompiled on NVCC. To workaround this issue, we apply a mask to the + // DeviceType. First check if the DeviceType is 16-bit. + // FB employees can see + // https://fb.workplace.com/groups/llvm.gcc/permalink/4053565044692080/ + // for more details + static_assert(sizeof(DeviceType) == 1, "DeviceType is not 8-bit"); + auto p = device_guard_impl_registry[static_cast(type) & 0xFF].load(); + + // This seems to be the first place where you make use of a device + // when you pass devices to factory functions. Give a nicer error + // message in this case. + TORCH_CHECK(p, "PyTorch is not linked with support for ", type, " devices"); + return p; +} + +inline bool hasDeviceGuardImpl(DeviceType type) { + return device_guard_impl_registry[static_cast(type)].load(); +} + +} // namespace impl +} // namespace c10 diff --git a/phivenv/Lib/site-packages/torch/include/c10/core/impl/FakeGuardImpl.h b/phivenv/Lib/site-packages/torch/include/c10/core/impl/FakeGuardImpl.h new file mode 100644 index 0000000000000000000000000000000000000000..d2660514c49ac13ac4de019b66d5b958557c988d --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/core/impl/FakeGuardImpl.h @@ -0,0 +1,102 @@ +#pragma once + +#include + +#include + +namespace c10::impl { + +// FakeGuardImpl is hardcoded to have eight devices. Not for +// any good reason, just to simplify code. +constexpr DeviceIndex kFakeGuardImplMaxDevices = 8; + +/** + * A fake implementation of DeviceGuardImplInterface suitable for testing. + * The current device is modeled as a mutable field in the guard implementation + * class. See DeviceGuard_test.cpp for an example use. + */ +template +struct FakeGuardImpl final : public DeviceGuardImplInterface { + static constexpr DeviceType static_type = T; + // Runtime device type is not used + FakeGuardImpl(DeviceType) {} + FakeGuardImpl() = default; + DeviceType type() const override { + return T; + } + Device exchangeDevice(Device d) const override { + AT_ASSERT(d.type() == type()); + AT_ASSERT(d.index() < kFakeGuardImplMaxDevices); + Device old_device = getDevice(); + if (old_device.index() != d.index()) { + current_device_ = d.index(); + } + return old_device; + } + Device getDevice() const override { + return Device(type(), current_device_); + } + void setDevice(Device d) const override { + AT_ASSERT(d.type() == type()); + AT_ASSERT(d.index() >= 0); + AT_ASSERT(d.index() < kFakeGuardImplMaxDevices); + current_device_ = d.index(); + } + void uncheckedSetDevice(Device d) const noexcept override { + current_device_ = d.index(); + } + Stream getStream(Device d) const noexcept override { + return Stream(Stream::UNSAFE, d, current_streams_[d.index()]); + } + Stream exchangeStream(Stream s) const noexcept override { + auto old_id = current_streams_[s.device_index()]; + current_streams_[s.device_index()] = s.id(); + return Stream(Stream::UNSAFE, s.device(), old_id); + } + DeviceIndex deviceCount() const noexcept override { + return kFakeGuardImplMaxDevices; + } + + // Event-related functions + void record( + void** /*event*/, + const Stream& /*stream*/, + const DeviceIndex /*device_index*/, + const EventFlag /*flag*/) const override {} + void block(void* /*event*/, const Stream& /*stream*/) const override {} + bool queryEvent(void* /*event*/) const override { + return true; + } + void destroyEvent(void* /*event*/, const DeviceIndex /*device_index*/) + const noexcept override {} + + // Convenience methods for testing + static DeviceIndex getDeviceIndex() { + return current_device_; + } + static void setDeviceIndex(DeviceIndex i) { + AT_ASSERT(i >= 0); + AT_ASSERT(i < kFakeGuardImplMaxDevices); + current_device_ = i; + } + static StreamId getCurrentStreamIdFor(DeviceIndex i) { + return current_streams_.at(i); + } + static void resetStreams() { + current_streams_.fill(0); + } + + private: + thread_local static DeviceIndex current_device_; + thread_local static std::array + current_streams_; +}; + +template +thread_local DeviceIndex FakeGuardImpl::current_device_ = 0; + +template +thread_local std::array + FakeGuardImpl::current_streams_ = {0, 0, 0, 0, 0, 0, 0, 0}; + +} // namespace c10::impl diff --git a/phivenv/Lib/site-packages/torch/include/c10/core/impl/GPUTrace.h b/phivenv/Lib/site-packages/torch/include/c10/core/impl/GPUTrace.h new file mode 100644 index 0000000000000000000000000000000000000000..9101b1b29c34a8b3cf61c4f0b759066a804febf5 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/core/impl/GPUTrace.h @@ -0,0 +1,28 @@ +#pragma once + +#include + +namespace c10::impl { + +struct C10_API GPUTrace { + // On the x86 architecture the atomic operations are lock-less. + static std::atomic gpuTraceState; + + // When PyTorch migrates to C++20, this should be changed to an atomic flag. + // Currently, the access to this variable is not synchronized, on the basis + // that it will only be flipped once and by the first interpreter that + // accesses it. + static bool haveState; + + // This function will only register the first interpreter that tries to invoke + // it. For all of the next ones it will be a no-op. + static void set_trace(const PyInterpreter*); + + static const PyInterpreter* get_trace() { + if (!haveState) + return nullptr; + return gpuTraceState.load(std::memory_order_acquire); + } +}; + +} // namespace c10::impl diff --git a/phivenv/Lib/site-packages/torch/include/c10/core/impl/HermeticPyObjectTLS.h b/phivenv/Lib/site-packages/torch/include/c10/core/impl/HermeticPyObjectTLS.h new file mode 100644 index 0000000000000000000000000000000000000000..a12eccc0a4432c275aa7d294c8968d463243636b --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/core/impl/HermeticPyObjectTLS.h @@ -0,0 +1,62 @@ +#pragma once + +#include +#include + +namespace c10::impl { + +// This TLS controls whether or not we permanently associate PyObject +// with Tensor the first time it is allocated. When hermetic PyObject +// TLS is enabled (state is true), we DO NOT save PyObjects to Tensor, +// meaning you get a distinct PyObject whenever you execute the code in +// question. +struct C10_API HermeticPyObjectTLS { + static void set_state(bool state); + static bool get_state() { + // Hypothetical fastpath if torchdeploy/multipy // codespell:ignore multipy + // isn't used. Per + // https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p2055r0.pdf + // this qualifies relaxed access because it is a single-location data + // structure (only the boolean here). + // + // Forgetting about data races for a moment, is there a logical race? + // + // - Boolean only ever transitions from false to true. So the + // critical situation is when one interpreter is already running + // when a second interpreter switches haveState from false to true. + // + // - The first interpreter is indifferent whether or not it sees + // hasState true/false; obviously false works (this is what the + // interpreter was previously using; more directly, the interpreter + // calls into itself as the handler, so being hermetic is not + // required), and true simply means serviced python operator calls will + // be hermetic; in these cases it is expected to be functionally + // equivalent. + // + // - The second interpreter MUST see hasState true (as its requests will + // be forwarded to the first interpreter), but it is assumed that there + // is a synchronization between the interpreter initialization, and + // when we actually perform operations, so it is guaranteed to see + // hasState true. + // + // QED. + // + // This fastpath is currently disabled so that we can more easily test that + // hermetic mode works correctly even on stock build of PyTorch. + if (false && !haveState_.load(std::memory_order_relaxed)) + return false; + return get_tls_state(); + } + // Call this from the multipy/torchdeploy // codespell:ignore multipy + // top level + static void init_state(); + + private: + // This only flipped once from false to true during + // torchdeploy/multipy initialization, // codespell:ignore multipy + // and never again. + static std::atomic haveState_; + static bool get_tls_state(); +}; + +} // namespace c10::impl diff --git a/phivenv/Lib/site-packages/torch/include/c10/core/impl/InlineDeviceGuard.h b/phivenv/Lib/site-packages/torch/include/c10/core/impl/InlineDeviceGuard.h new file mode 100644 index 0000000000000000000000000000000000000000..cfab17b0fcbebcb12172b1a81114efc480716a8b --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/core/impl/InlineDeviceGuard.h @@ -0,0 +1,433 @@ +#pragma once + +// This file provides implementations of InlineDeviceGuard and +// InlineOptionalDeviceGuard. + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace c10::impl { + +/** + * A DeviceGuard is an RAII class that sets a device to some value + * on construction, and resets the device to its original value on + * destruction. + * + * InlineDeviceGuard is a helper class for implementing DeviceGuards. + * It is templated over a DeviceGuardImpl (anything that implements + * DeviceGuardImplInterface). There are two primary ways to instantiate + * InlineDeviceGuard: + * + * - With a concrete implementation of DeviceGuardImpl, e.g., CUDAGuardImpl. + * This is the best way to use InlineDeviceGuard, as all calls are + * devirtualized, giving you code as efficient as straight line + * calls to cudaGetDevice/cudaSetDevice. + * + * - With VirtualGuardImpl, which does a virtual dispatch to a DeviceGuardImpl + * retrieved from a DeviceType registry. We have explicitly instantiated + * InlineDeviceGuard this way as c10::DeviceGuard. + * + * If you are in a hurry, you can use InlineDeviceGuard directly: + * + * using CUDAGuard = impl::InlineDeviceGuard; + * + * However, you can provide a better user experience if you explicitly write a + * wrapper class that itself contains the template instantiation: + * + * class CUDAGuard { + * public: + * // ... the API ... + * private: + * impl::InlineDeviceGuard guard_; + * } + * + * The wrapper class provides a good place to write documentation, and helps + * avoid weird template instantiation errors when a user incorrectly uses the + * class. + * + * If you need to test this class, consider instantiating it with FakeGuardImpl. + */ +template +class InlineDeviceGuard { + public: + // Note [Omitted default constructor from RAII] + // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + // In principle, we could add a default constructor to + // DeviceGuard which reads the current device and promises to + // restore to that device on exit. However, most cases where you + // would have written this, you probably meant to actually just + // use DeviceGuard (since you don't actually need the + // restore to happen if you don't ever actually set the device). + // We remove the constructor here to encourage you to think about + // what you actually want to happen. + explicit InlineDeviceGuard() = delete; + + /// Set the current device to the passed Device. + explicit InlineDeviceGuard(Device device) + : impl_(device.type()), + original_device_( + device.index() == -1 ? impl_.getDevice() + : impl_.exchangeDevice(device)), + current_device_(device.index() == -1 ? original_device_ : device) {} + + /// Set the current device index to the passed DeviceIndex. (The + /// device type is inferred from the template parameter T). + template < + typename U = T, + typename = + typename std::enable_if_t>> + explicit InlineDeviceGuard(DeviceIndex device_index) + : InlineDeviceGuard(Device(U::static_type, device_index)) {} + + /// Construct an InlineDeviceGuard using VirtualGuardImpl with an explicit + /// DeviceGuardImplInterface pointer. + template < + typename U = T, + typename = typename std::enable_if_t>> + explicit InlineDeviceGuard( + Device device, + const DeviceGuardImplInterface* impl) + : impl_( + VirtualGuardImpl(impl ? impl : getDeviceGuardImpl(device.type()))), + original_device_( + device.index() == -1 ? impl_.getDevice() + : impl_.exchangeDevice(device)), + current_device_(device.index() == -1 ? original_device_ : device) {} + + /// Copy is disallowed + InlineDeviceGuard(const InlineDeviceGuard&) = delete; + InlineDeviceGuard& operator=(const InlineDeviceGuard&) = delete; + + /// Move is disallowed, as DeviceGuard does not have an uninitialized state, + /// which is required for moves on types with nontrivial destructors. + InlineDeviceGuard(InlineDeviceGuard&& other) = delete; + InlineDeviceGuard& operator=(InlineDeviceGuard&& other) = delete; + + ~InlineDeviceGuard() { + impl_.uncheckedSetDevice(original_device_); + } + + /// Sets the device to the given one. + template < + typename U = T, + typename std::enable_if_t, int> = 0> + void set_device(at::Device device) { + AT_ASSERT( + (U::static_type == DeviceType::HIP && device.is_cuda()) || + device.type() == U::static_type); + auto index = device.index(); + if (index == -1) + return; + impl_.setDevice(device); + current_device_ = device; + } + + /// Resets the currently set device to its original device, and then sets the + /// current device to the passed device. This is effectively equivalent to + /// set_device when a guard supports only a single device type. + template + typename std::enable_if_t> reset_device( + at::Device device) { + set_device(device); + } + + /// Resets the currently set device to its original device, and then sets the + /// current device to the passed device (for a possibly different device + /// type). + /// + /// This method is named reset_device to highlight the fact that previous + /// device settings from this guard are NOT preserved, even if the device + /// has a different device type. For example: + /// + /// // CUDA device is 0 + /// DeviceGuard g(Device(kCUDA, 1)); + /// g.reset_device(Device(kHIP, 2)); + /// // CUDA device is 0 (!!) + /// + /// NOTE: this implementation may skip some device setting if it can prove + /// that it is unnecessary. + /// + /// Optional argument is for testing only. + template + typename std::enable_if_t> reset_device( + at::Device device, + const impl::DeviceGuardImplInterface* impl = nullptr) { + auto index = device.index(); + if (index == -1) + return; + if (device.type() == original_device_.type()) { + AT_ASSERT(impl == nullptr || impl->type() == device.type()); + impl_.setDevice(device); + current_device_ = device; + } else { + // Destruct and reconstruct the DeviceGuard in place + impl_.setDevice(original_device_); + impl_ = !impl ? VirtualGuardImpl(device.type()) : VirtualGuardImpl(impl); + original_device_ = impl_.exchangeDevice(device); + current_device_ = device; + } + } + + /// Sets the device index to the given one. The device type is inferred + /// from the original device type. + void set_index(DeviceIndex index) { + reset_device(Device(original_device_.type(), index)); + } + + /// Returns the device that was set at the time the most recent + /// reset_device(), or otherwise the device at construction time. + Device original_device() const { + return original_device_; + } + + /// Returns the most recent device that was set using this device guard, + /// either from construction, or via set_device/reset_device/set_index. + Device current_device() const { + return current_device_; + } + + protected: + T impl_; + + private: + Device original_device_; + Device current_device_; +}; + +/** + * A OptionalDeviceGuard is an RAII class that sets a device to some value on + * initialization, and resets the device to its original value on destruction. + * + * InlineOptionalDeviceGuard is a helper class for implementing + * OptionalDeviceGuards. See guidance in InlineDeviceGuard on how to + * use this. See OptionalDeviceGuard for user-oriented usage notes. + */ +template +class InlineOptionalDeviceGuard { + public: + // Note [Explicit initialization of optional fields] + // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + // Explicit initialization of optional fields + // required to workaround an nvcc bug; see + // https://github.com/pytorch/pytorch/issues/12117 + + /// Creates an uninitialized OptionalDeviceGuard. + explicit InlineOptionalDeviceGuard() + : guard_() // See Note [Explicit initialization of optional fields] + {} + ~InlineOptionalDeviceGuard() = default; + + /// Set the current device to the passed Device, if it is not nullopt. + explicit InlineOptionalDeviceGuard(std::optional device_opt) + : guard_() { // See Note [Explicit initialization of optional fields] + if (device_opt.has_value()) { + guard_.emplace(device_opt.value()); + } + } + + /// Set the current device to the passed DeviceIndex, if it is not nullopt. + template < + typename U = T, + typename = + typename std::enable_if_t>> + explicit InlineOptionalDeviceGuard( + std::optional device_index_opt) + : guard_() { // See Note [Explicit initialization of optional fields] + if (device_index_opt.has_value()) { + guard_.emplace(device_index_opt.value()); + } + } + + /// All constructors of DeviceGuard are valid for OptionalDeviceGuard + /// and result in initialized OptionalDeviceGuard. + template + explicit InlineOptionalDeviceGuard(Args&&... args) + : guard_(std::in_place, std::forward(args)...) {} + + // TODO: Consider reading Tensor and TensorList constructors here, when + // Tensor moves to c10. (These are only valid on OptionalDeviceGuard, + // because a Tensor may be undefined, in which case we need an uninitialized + // tensor guard.) + + // Note [Move construction for RAII guards is tricky] + // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + // In principle, move construction is useful for terminating + // the lifetime of a `OptionalDeviceGuard` early; for example: + // + // // current device is d0 + // OptionalDeviceGuard g1(d1); + // // current device is d1 + // { + // OptionalDeviceGuard g2(std::move(g1)); + // } + // // current device is d0!! + // + // However, it's difficult to implement the move constructor + // in a way that works in all situations. For example, consider + // the following example: + // + // OptionalDeviceGuard g1(d1); + // { + // OptionalDeviceGuard g2(d2); + // { + // OptionalDeviceGuard g3(std::move(g1)); // !!! + // } + // } + // + // What should the current device be while g3 in scope... and what + // should it be after it goes out of scope? What about g2? + // There don't seem to be satisfactory answers for these questions. + // + // It's in principle possible to raise an error when this occurs + // by doing some extra thread-local bookkeeping. But why bother? + // Just don't provide the constructor. + InlineOptionalDeviceGuard(const InlineOptionalDeviceGuard& other) = delete; + InlineOptionalDeviceGuard(InlineOptionalDeviceGuard&& other) = delete; + + // Note [Move assignment for RAII guards is tricky] + // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + // Move assignment is deleted, because you need to know which guard was + // defined "first", as that guard's original_device_ wins--with the current + // representation, we have no way of telling which is the case. (Move + // construction does not have this problem, as one guard is always + // uninitialized.) + // + // We can make this clear by way of a pair of examples: + // + // Example 1: + // + // // initial device is n0 + // { + // CUDAGuard g1(n1); + // { + // CUDAGuard g2(n2); + // // current device should be n2 + // g1 = std::move(g2); + // // current device should still be n2 + // } + // // current device should still be n2 + // } + // // current device should be n0 + // + // Example 2 (flip the order of the two guards): + // + // // initial device is n0 + // { + // CUDAGuard g2(n2); + // { + // CUDAGuard g1(n1); + // // current device should be n1 + // g1 = std::move(g2); + // // current device should be n2 + // } + // // current device should be n0 (since g2 has been vacated) + // } + // + // In both examples, we need g1 to restore to n0 after move assignment. + // However, in example 1, this is determined by the restore value of g1 + // (prior to the move). In example 2, however, it is determined by the the + // restore value of g2(!!). We don't know which one should win, without having + // a way of telling which guard was allocated first. + // + // We could solve this with an extra thread-local variable. But no one is + // actually using move-assignment. So just get rid of it. + InlineOptionalDeviceGuard& operator=(const InlineOptionalDeviceGuard& other) = + delete; + InlineOptionalDeviceGuard& operator=(InlineOptionalDeviceGuard&& other) = + delete; + + /// Sets the device to the given one. Initializes OptionalDeviceGuard if it + /// is not already initialized. + template < + typename U = T, + typename = + typename std::enable_if_t>> + void set_device(at::Device device) { + if (!guard_.has_value()) { + guard_.emplace(device); + } else { + guard_->set_device(device); + } + } + + /// Resets the currently set device to its original device, and then sets the + /// current device to the passed device (for a possibly different device + /// type). Initializes OptionalDeviceGuard if it is not already initialized. + /// + /// See notes on why this is called reset_device on InlineDeviceGuard. + /// + /// Optional argument is for testing only. + template < + typename U = T, + typename = typename std::enable_if_t>> + void reset_device( + at::Device device, + const DeviceGuardImplInterface* impl = nullptr) { + if (!guard_.has_value()) { + guard_.emplace(device, impl); + } else { + guard_->reset_device(device, impl); + } + } + + /// Resets the currently set device to its original device, and then sets the + /// current device to the passed device. Initializes the guard if it is + /// not already initialized. This is effectively equivalent to set_device + /// when a guard supports only a single device type. + template < + typename U = T, + typename = + typename std::enable_if_t>> + void reset_device(at::Device device) { + if (!guard_.has_value()) { + guard_.emplace(device); + } else { + guard_->reset_device(device); + } + } + + /// Sets the device index to the given one. The device type is statically + /// known. + template < + typename U = T, + typename = + typename std::enable_if_t>> + void set_index(DeviceIndex index) { + if (!guard_.has_value()) { + guard_.emplace(index); + } else { + guard_->set_index(index); + } + } + + /// Returns the device that was set immediately prior to initialization of + /// the, guard, or nullopt if the guard is uninitialized. + std::optional original_device() const { + return guard_.has_value() ? std::make_optional(guard_->original_device()) + : std::nullopt; + } + + /// Returns the most recent device that was set using this device guard, + /// either from construction, or via set_device, if the guard is initialized, + /// or nullopt if the guard is uninitialized. + std::optional current_device() const { + return guard_.has_value() ? std::make_optional(guard_->current_device()) + : std::nullopt; + } + + /// Restore the original device, resetting this guard to uninitialized state. + void reset() { + guard_.reset(); + } + + private: + std::optional> guard_; +}; + +} // namespace c10::impl diff --git a/phivenv/Lib/site-packages/torch/include/c10/core/impl/InlineEvent.h b/phivenv/Lib/site-packages/torch/include/c10/core/impl/InlineEvent.h new file mode 100644 index 0000000000000000000000000000000000000000..7d7c2b44f8945112615a72c9493f82188ced5ccc --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/core/impl/InlineEvent.h @@ -0,0 +1,147 @@ +#pragma once + +#include +#include +#include +#include + +namespace c10::impl { + +template +struct InlineEvent final { + InlineEvent() = delete; + InlineEvent( + const DeviceType _device_type, + const EventFlag _flag = EventFlag::PYTORCH_DEFAULT) + : backend_{_device_type}, device_type_{_device_type}, flag_{_flag} {} + + // Copy constructor and copy assignment operator (deleted) + InlineEvent(const InlineEvent&) = delete; + InlineEvent& operator=(const InlineEvent&) = delete; + + // Move constructor and move assignment operator + InlineEvent(InlineEvent&& other) noexcept + : event_(other.event_), + backend_(std::move(other.backend_)), + device_type_(other.device_type_), + device_index_(other.device_index_), + flag_(other.flag_), + was_marked_for_recording_(other.was_marked_for_recording_) { + other.event_ = nullptr; + } + InlineEvent& operator=(InlineEvent&& other) noexcept { + swap(other); + return *this; + } + + void swap(InlineEvent& other) noexcept { + std::swap(event_, other.event_); + std::swap(backend_, other.backend_); + std::swap(device_type_, other.device_type_); + std::swap(device_index_, other.device_index_); + std::swap(flag_, other.flag_); + std::swap(was_marked_for_recording_, other.was_marked_for_recording_); + } + + ~InlineEvent() noexcept { + if (event_) + backend_.destroyEvent(event_, device_index_); + } + + DeviceType device_type() const noexcept { + return device_type_; + } + DeviceIndex device_index() const noexcept { + return device_index_; + } + EventFlag flag() const noexcept { + return flag_; + } + bool was_marked_for_recording() const noexcept { + return was_marked_for_recording_; + } + + void recordOnce(const Stream& stream) { + if (!was_marked_for_recording_) + record(stream); + } + + void record(const Stream& stream) { + TORCH_CHECK( + stream.device_type() == device_type_, + "Event device type ", + DeviceTypeName(device_type_), + " does not match recording stream's device type ", + DeviceTypeName(stream.device_type()), + "."); + + backend_.record(&event_, stream, device_index_, flag_); + was_marked_for_recording_ = true; + device_index_ = stream.device_index(); + } + + void block(const Stream& stream) const { + if (!was_marked_for_recording_) + return; + + TORCH_CHECK( + stream.device_type() == device_type_, + "Event device type ", + DeviceTypeName(device_type_), + " does not match blocking stream's device type ", + DeviceTypeName(stream.device_type()), + "."); + + backend_.block(event_, stream); + } + + bool query() const { + if (!was_marked_for_recording_) + return true; + return backend_.queryEvent(event_); + } + + void* eventId() const { + return event_; + } + + double elapsedTime(const InlineEvent& other) const { + TORCH_CHECK( + other.device_type() == device_type_, + "Event device type ", + DeviceTypeName(device_type_), + " does not match other's device type ", + DeviceTypeName(other.device_type()), + "."); + TORCH_CHECK_VALUE( + (flag_ == EventFlag::BACKEND_DEFAULT) && + (other.flag_ == EventFlag::BACKEND_DEFAULT), + "Both events must be created with argument 'enable_timing=True'."); + TORCH_CHECK_VALUE( + was_marked_for_recording() && other.was_marked_for_recording(), + "Both events must be recorded before calculating elapsed time."); + // elapsedTime in MPS can wait event to be completed if event is not ready, + // which is a little different from CUDA + TORCH_CHECK( + (query() && other.query()) || device_type_ == DeviceType::MPS, + "Both events must be completed before calculating elapsed time."); + + return backend_.elapsedTime(event_, other.event_, device_index_); + } + + void synchronize() const { + if (!was_marked_for_recording_) + return; + backend_.synchronizeEvent(event_); + } + + private: + void* event_ = nullptr; + T backend_; + DeviceType device_type_; + DeviceIndex device_index_ = -1; + EventFlag flag_ = EventFlag::PYTORCH_DEFAULT; + bool was_marked_for_recording_ = false; +}; + +} // namespace c10::impl diff --git a/phivenv/Lib/site-packages/torch/include/c10/core/impl/InlineStreamGuard.h b/phivenv/Lib/site-packages/torch/include/c10/core/impl/InlineStreamGuard.h new file mode 100644 index 0000000000000000000000000000000000000000..c830ce9c4cf16e7e893cba1001960bc806b17cd2 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/core/impl/InlineStreamGuard.h @@ -0,0 +1,260 @@ +#pragma once + +#include +#include +#include + +namespace c10::impl { + +/** + * A StreamGuard is an RAII class that changes the current device + * to the device corresponding to some stream, and changes the + * default stream on that device to be this stream. + * + * InlineStreamGuard is a helper class for implementing StreamGuards. + * See InlineDeviceGuard for guidance on how to use this class. + */ +template +class InlineStreamGuard : private InlineDeviceGuard { + public: + /// No default constructor, see Note [Omitted default constructor from RAII] + explicit InlineStreamGuard() = delete; + + /// Set the current device to the device associated with the passed stream, + /// and set the current stream on that device to the passed stream. + explicit InlineStreamGuard(Stream stream) + : InlineDeviceGuard(stream.device()), + original_stream_of_original_device_( + this->impl_.getStream(original_device())), + original_stream_of_current_device_(this->impl_.exchangeStream(stream)), + current_stream_(stream) {} + + /// This constructor exists purely for testing + template < + typename U = T, + typename = typename std::enable_if_t>> + explicit InlineStreamGuard( + Stream stream, + const DeviceGuardImplInterface* impl) + : InlineDeviceGuard( + stream.device(), + impl ? impl : getDeviceGuardImpl(stream.device_type())), + original_stream_of_original_device_( + this->impl_.getStream(original_device())), + original_stream_of_current_device_(this->impl_.exchangeStream(stream)), + current_stream_(stream) {} + + /// Copy is disallowed + InlineStreamGuard(const InlineStreamGuard&) = delete; + InlineStreamGuard& operator=(const InlineStreamGuard&) = delete; + + /// Move is disallowed, as StreamGuard does not have an uninitialized state, + /// which is required for moves on types with nontrivial destructors. + InlineStreamGuard(InlineStreamGuard&& other) = delete; + InlineStreamGuard& operator=(InlineStreamGuard&& other) = delete; + + ~InlineStreamGuard() { + this->impl_.exchangeStream(original_stream_of_current_device_); + } + + /// Resets the currently set stream to the original stream and + /// the currently set device to the original device. Then, + /// set the current device to the device associated with the passed stream, + /// and set the current stream on that device to the passed stream. + /// + /// NOTE: this implementation may skip some stream/device setting if + /// it can prove that it is unnecessary. + /// + /// WARNING: reset_stream does NOT preserve previously set streams on + /// different devices. If you need to set streams on multiple devices + /// use MultiStreamGuard instead. + void reset_stream(Stream stream) { + // TODO: make a version that takes an impl argument. Unfortunately, + // that will require SFINAE because impl is only valid for the + // VirtualGuardImpl specialization. + if (stream.device() == this->current_device()) { + this->impl_.exchangeStream(stream); + current_stream_ = stream; + } else { + // Destruct and reconstruct the StreamGuard in-place + this->impl_.exchangeStream(original_stream_of_current_device_); + this->reset_device(stream.device()); + original_stream_of_current_device_ = this->impl_.exchangeStream(stream); + current_stream_ = stream; + } + } + + // It's not clear if set_device should also reset the current stream + // if the device is unchanged; therefore, we don't provide it. + // The situation is somewhat clearer with reset_device, but it's still + // a pretty weird thing to do, so haven't added this either. + + /// Returns the stream of the original device prior to this guard. Subtly, + /// the stream returned here is the original stream of the *original* + /// device; i.e., it's the stream that your computation *would* have + /// been put on, if it hadn't been for this meddling stream guard. + /// This is usually what you want. + Stream original_stream() const { + return original_stream_of_original_device_; + } + + /// Returns the most recent stream that was set using this device guard, + /// either from construction, or via set_stream. + Stream current_stream() const { + return current_stream_; + } + + /// Returns the most recent device that was set using this device guard, + /// either from construction, or via set_device/reset_device/set_index. + Device current_device() const { + return InlineDeviceGuard::current_device(); + } + + /// Returns the device that was set at the most recent reset_stream(), + /// or otherwise the device at construction time. + Device original_device() const { + return InlineDeviceGuard::original_device(); + } + + private: + Stream + original_stream_of_original_device_; // what the user probably cares about + Stream original_stream_of_current_device_; // what we need to restore + Stream current_stream_; +}; + +/** + * An OptionalStreamGuard is an RAII class that sets a device to some value on + * initialization, and resets the device to its original value on destruction. + * See InlineOptionalDeviceGuard for more guidance on how to use this class. + */ +template +class InlineOptionalStreamGuard { + public: + /// Creates an uninitialized stream guard. + explicit InlineOptionalStreamGuard() + : guard_() // See Note [Explicit initialization of optional fields] + {} + ~InlineOptionalStreamGuard() = default; + + /// Set the current device to the device associated with the passed stream, + /// and set the current stream on that device to the passed stream, + /// if the passed stream is not nullopt. + explicit InlineOptionalStreamGuard(std::optional stream_opt) + : guard_() { + if (stream_opt.has_value()) { + guard_.emplace(stream_opt.value()); + } + } + + /// All constructors of StreamGuard are valid for OptionalStreamGuard + template + explicit InlineOptionalStreamGuard(Args&&... args) + : guard_(std::in_place, std::forward(args)...) {} + + InlineOptionalStreamGuard(const InlineOptionalStreamGuard& other) = delete; + InlineOptionalStreamGuard& operator=(const InlineOptionalStreamGuard& other) = + delete; + // See Note [Move construction for RAII guards is tricky] + InlineOptionalStreamGuard(InlineOptionalStreamGuard&& other) = delete; + + // See Note [Move assignment for RAII guards is tricky] + InlineOptionalStreamGuard& operator=(InlineOptionalStreamGuard&& other) = + delete; + + /// Resets the currently set stream to the original stream and + /// the currently set device to the original device. Then, + /// set the current device to the device associated with the passed stream, + /// and set the current stream on that device to the passed stream. + /// Initializes the OptionalStreamGuard if it was not previously initialized. + void reset_stream(Stream stream) { + if (guard_.has_value()) { + guard_->reset_stream(stream); + } else { + guard_.emplace(stream); + } + } + + /// Returns the stream that was set at the time the guard was most recently + /// initialized, or nullopt if the guard is uninitialized. + std::optional original_stream() const { + return guard_.has_value() ? std::make_optional(guard_->original_stream()) + : std::nullopt; + } + + /// Returns the most recent stream that was set using this stream guard, + /// either from construction, or via reset_stream, if the guard is + /// initialized, or nullopt if the guard is uninitialized. + std::optional current_stream() const { + return guard_.has_value() ? std::make_optional(guard_->current_stream()) + : std::nullopt; + } + + /// Restore the original device and stream, resetting this guard to + /// uninitialized state. + void reset() { + guard_.reset(); + } + + private: + std::optional> guard_; +}; + +template +class InlineMultiStreamGuard { + public: + /// Calls `set_stream` on each of the streams in the list. + /// This may be useful if you need to set different streams + /// for different devices. + explicit InlineMultiStreamGuard(ArrayRef streams) { + if (!streams.empty()) { + impl_.emplace(getDeviceTypeOfStreams(streams)); + original_streams_.reserve(streams.size()); + for (const Stream& s : streams) { + original_streams_.emplace_back(this->impl_->exchangeStream(s)); + } + } + } + + /// Copy is disallowed + InlineMultiStreamGuard(const InlineMultiStreamGuard&) = delete; + InlineMultiStreamGuard& operator=(const InlineMultiStreamGuard&) = delete; + + /// Move is disallowed, as StreamGuard does not have an uninitialized state, + /// which is required for moves on types with nontrivial destructors. + InlineMultiStreamGuard(InlineMultiStreamGuard&& other) = delete; + InlineMultiStreamGuard& operator=(InlineMultiStreamGuard&& other) = delete; + + ~InlineMultiStreamGuard() noexcept { + if (this->impl_.has_value()) { + for (const Stream& s : original_streams_) { + this->impl_->exchangeStream(s); + } + } + } + + protected: + std::optional impl_; + + private: + /// The original streams that were active on all devices. + std::vector original_streams_; + + static DeviceType getDeviceTypeOfStreams(ArrayRef streams) { + TORCH_INTERNAL_ASSERT(!streams.empty()); + DeviceType type = streams[0].device_type(); + for (const auto idx : c10::irange(1, streams.size())) { + TORCH_CHECK_VALUE( + streams[idx].device_type() == type, + "Streams have a mix of device types: stream 0 is on ", + streams[0].device(), + " while stream ", + idx, + " is on device ", + streams[idx].device()); + } + return type; + } +}; + +} // namespace c10::impl diff --git a/phivenv/Lib/site-packages/torch/include/c10/core/impl/LocalDispatchKeySet.h b/phivenv/Lib/site-packages/torch/include/c10/core/impl/LocalDispatchKeySet.h new file mode 100644 index 0000000000000000000000000000000000000000..9bf4dfad140c7520afb5453f8bda01f5597c51c0 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/core/impl/LocalDispatchKeySet.h @@ -0,0 +1,169 @@ +#pragma once + +#include +#include + +// TLS management for DispatchKeySet (the "local" DispatchKeySet(s)) +// +// This manages two thread-local DispatchKeySets: +// +// - The included type set, which adds a tensor type for consideration +// in dispatch. (For example, you might add Profiling to +// the included type set to turn on profiling on all tensor operations.) +// +// - The excluded type set, which disqualifies a tensor type from dispatch. +// (For example, after redispatching on variable, we disqualify +// Autograd so we don't attempt to handle variable again.) +// (Exclusion wins over inclusion.) +// +// NB: Originally, I implemented the excluded type set as storing the inverted +// set, but TLS is defined to be zero-initialized, so this doesn't actually work +// (if it's inverted, you want the set to be -1 initialized). + +namespace c10::impl { + +// POD version of LocalDispatchKeySet. Declared here just so that +// we can put it in the guards. +// This struct encapsulates special handling for TLS initialization +// in set_included()/included() API so that they reflect the truth. +// If you want to create PODLocalDispatchKeySet with non-zero state, +// use set_included() instead of default constructor. +struct C10_API PODLocalDispatchKeySet { + uint64_t included_; + uint64_t excluded_; + + // See Note [TLS Initialization] + DispatchKeySet included() const { + return DispatchKeySet(DispatchKeySet::RAW, included_) ^ + c10::default_included_set; + } + DispatchKeySet excluded() const { + return DispatchKeySet(DispatchKeySet::RAW, excluded_) ^ + c10::default_excluded_set; + } + + void set_included(DispatchKeySet x) { + included_ = (x ^ c10::default_included_set).raw_repr(); + } + void set_excluded(DispatchKeySet x) { + excluded_ = (x ^ c10::default_excluded_set).raw_repr(); + } +}; +static_assert( + std::is_trivial_v, + "PODLocalDispatchKeySet must be a POD type."); + +struct C10_API LocalDispatchKeySet { + /* implicit */ LocalDispatchKeySet(PODLocalDispatchKeySet x) + : included_(x.included()), excluded_(x.excluded()) {} + DispatchKeySet included_; + DispatchKeySet excluded_; +}; + +// thread_local variables cannot be C10_API on Windows. +// Inlining this seems to break AutoDispatchBelowAutograd on Android. +#if defined(_MSC_VER) || defined(C10_ANDROID) || defined(C10_IPHONE) +C10_API LocalDispatchKeySet tls_local_dispatch_key_set(); +#else // defined(_MSC_VER) || defined(C10_ANDROID) || defined(C10_IPHONE) +extern C10_API thread_local PODLocalDispatchKeySet raw_local_dispatch_key_set; + +inline C10_API LocalDispatchKeySet tls_local_dispatch_key_set() { + // Don't let people fiddle with the thread_local directly just + // because they include this header. + return raw_local_dispatch_key_set; +} +#endif // defined(_MSC_VER) || defined(C10_ANDROID) || defined(C10_IPHONE) + +// Internal, use ThreadLocalStateGuard +C10_API void _force_tls_local_dispatch_key_set(LocalDispatchKeySet key_set); + +// RAII API for manipulating the thread-local dispatch state. + +class C10_API IncludeDispatchKeyGuard { + public: + IncludeDispatchKeyGuard(DispatchKeySet); + IncludeDispatchKeyGuard(DispatchKey k) + : IncludeDispatchKeyGuard(DispatchKeySet(k)) {} + IncludeDispatchKeyGuard(const IncludeDispatchKeyGuard&) = delete; + IncludeDispatchKeyGuard operator=(const IncludeDispatchKeyGuard&) = delete; + IncludeDispatchKeyGuard(IncludeDispatchKeyGuard&&) = delete; + IncludeDispatchKeyGuard operator=(IncludeDispatchKeyGuard&&) = delete; + ~IncludeDispatchKeyGuard(); + + private: + // A little micro-optimization to save us from tls_get_addr call + // on destruction + PODLocalDispatchKeySet* tls_; + DispatchKeySet include_; +}; + +class C10_API ExcludeDispatchKeyGuard { + public: + ExcludeDispatchKeyGuard(DispatchKeySet); + ExcludeDispatchKeyGuard(DispatchKey k) + : ExcludeDispatchKeyGuard(DispatchKeySet(k)) {} + ExcludeDispatchKeyGuard(const ExcludeDispatchKeyGuard&) = delete; + ExcludeDispatchKeyGuard operator=(const ExcludeDispatchKeyGuard&) = delete; + ExcludeDispatchKeyGuard(ExcludeDispatchKeyGuard&&) = delete; + ExcludeDispatchKeyGuard operator=(ExcludeDispatchKeyGuard&&) = delete; + ~ExcludeDispatchKeyGuard(); + + private: + // A little micro-optimization to save us from tls_get_addr call + // on destruction + PODLocalDispatchKeySet* tls_; + DispatchKeySet exclude_; +}; + +struct C10_API ForceDispatchKeyGuard { + public: + ForceDispatchKeyGuard() + : saved_keyset_(c10::impl::tls_local_dispatch_key_set()) {} + ForceDispatchKeyGuard(c10::impl::LocalDispatchKeySet key_set) + : ForceDispatchKeyGuard() { + c10::impl::_force_tls_local_dispatch_key_set(key_set); + } + ForceDispatchKeyGuard( + c10::DispatchKeySet include, + c10::DispatchKeySet exclude) + : ForceDispatchKeyGuard() { + auto updated_set = saved_keyset_; + updated_set.included_ = include; + updated_set.excluded_ = exclude; + c10::impl::_force_tls_local_dispatch_key_set(updated_set); + } + + ForceDispatchKeyGuard(ForceDispatchKeyGuard&&) noexcept = delete; + ForceDispatchKeyGuard(const ForceDispatchKeyGuard&) = delete; + ForceDispatchKeyGuard& operator=(const ForceDispatchKeyGuard&) = delete; + ForceDispatchKeyGuard& operator=(ForceDispatchKeyGuard&&) = delete; + ~ForceDispatchKeyGuard() { + c10::impl::_force_tls_local_dispatch_key_set(saved_keyset_); + } + + private: + c10::impl::LocalDispatchKeySet saved_keyset_; +}; + +// Non-RAII API for manipulating the thread-local dispatch state. +// Please prefer the RAII API. The non-RAII API may be useful when +// the included/excluded state of a given DispatchKey must span +// many calls from the Python to the C++, so you cannot conveniently +// use an RAII guard. +// +// Example use case: a Python context manager that includes a certain +// DispatchKey, to ensure ops running under the context manager dispatch +// through that DispatchKey's registered overrides. +// +// The non-RAII API is less efficient than the RAII guards because both the +// getter and setter will do a tls_getaddr lookup (the RAII struct only needs +// one!) + +C10_API bool tls_is_dispatch_key_excluded(DispatchKey x); +C10_API void tls_set_dispatch_key_excluded(DispatchKey x, bool desired_state); +C10_API bool tls_is_dispatch_key_included(DispatchKey x); +C10_API void tls_set_dispatch_key_included(DispatchKey x, bool desired_state); +C10_API bool tls_is_dispatch_keyset_excluded(DispatchKeySet ks); +C10_API bool tls_is_dispatch_keyset_included(DispatchKeySet ks); + +} // namespace c10::impl diff --git a/phivenv/Lib/site-packages/torch/include/c10/core/impl/PyInterpreter.h b/phivenv/Lib/site-packages/torch/include/c10/core/impl/PyInterpreter.h new file mode 100644 index 0000000000000000000000000000000000000000..0fc4315dd14d0bceecbff16f21e320163eea1619 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/core/impl/PyInterpreter.h @@ -0,0 +1,263 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// Forward declarations + +namespace c10 { +struct IValue; +class OperatorHandle; +struct TensorImpl; +} // namespace c10 + +namespace torch::jit { +using Stack = std::vector; +} + +// Actual implementation + +namespace c10::impl { + +struct C10_API PyInterpreter; + +// Note [Python interpreter tag] +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +// Traditionally, PyTorch is layered such that our Python library +// (libtorch_python) references our pure C++ library (libtorch) as the +// natural order of things. However, sometimes this natural order is +// subverted: C++ objects refer to Python objects (for example, we +// store a PyObject* pointer on TensorImpl so that converting from a +// C++ Tensor to a Python Tensor is just a memory dereference). +// +// These unusual orderings must be treated with care. To start, you need to +// virtualize the destructor so that the PyObject can be decref'ed on +// destruction (because the C++ object itself doesn't know anything about +// Python--remember, layering!). This process itself is fraught, since +// acquiring the GIL could lead to deadlocks if someone is blocking on you +// while holding the GIL. Furthermore, if the C++ objects outlive the +// interpreter (which can happen if you stash them in a static global +// variable defined in libtorch), you may attempt to decref the object when +// the Python interpreter has already been shutdown. +// +// BUT WAIT, IT GETS WORSE. With torchdeploy, there may be multiple Python +// interpreters in a single process. If a C++ object is accessible from +// multiple interpreters, we must take care not to accidentally pass a +// PyObject from one interpreter with another interpreter. +// +// To prevent these mixups, we introduce a PyInterpreter "tag" (object with +// a vtable), which specifies a specific Python interpreter. +// +// - Any given object can be associated with AT MOST one Python interpreter. +// We represent the interpreter tag as a memory address to an instance of +// a virtual class that is allocated once per interpreter (this is so that +// we can request the interpreter to perform operations for us, if +// necessary). +// +// - It can be recorded with a PyObject (PyInterpreterObject) so that +// we know what interpreter the object is associated with, and we can +// raise an error if you try to use the PyObject from the wrong +// interpreter context. +// +// - It contains a vtable that can be used to perform various Python +// operations from ordinary C++ code that ordinarily wouldn't be accessible +// from libtorch. +// +// A simple use case is when a C++ object must be associated with a PyObject. +// However, for TensorImpl, we lazily allocate a PyObject the first time the +// object passes into Python. The invariants for this situation are more +// subtle: +// +// - A given TensorImpl's interpreter tag can only go from uninitialized to +// tagged; once tagged, this is a quiescent state (once tagged to an +// interpreter, ALWAYS tagged to that interpreter) +// +// - A thread may mutate the PyObject field of a TensorImpl if and only if it +// holds the GIL for the interpreter tagged on the TensorImpl. (If the +// TensorImpl is not tagged, it must first atomically claim its tag before it +// can validly write) +// +// WARNING: This class has to be written very carefully, because it may be +// possible for a Tensor to have a reference an interpreter corresponding to +// a shared library that has ALREADY BEEN UNLOADED. This makes blindly calling +// virtual methods very dangerous, because the vtable may be garbage at that +// point (on a good day, you might get "pure virtual method called"). +// +// The idea to solve this problem is we always leak PyInterpreters (so they +// always stay live even after dlclose), and make sure we can disarm their +// virtual methods by indirecting through a separate PyInterpreterVTable +// object. This can be replaced with a no-op vtable from libc10.so, which +// is guaranteed to stick around until the bitter end. +// +// NB: The downside with representing PyInterpreter tags as full objects is that +// it takes an extra word on TensorImpl. If tags were instead just integer +// indices, on 64-bit architectures we could pack the tag and PyObject together +// into a single atomic word. On 32-bit architectures we could simply say that +// only one Python interpreter is supported (erroring if a nontrivial +// interpreter tag is attempted to be set). +// +// The difficulty with this scheme is we need to maintain an out-of-line table +// to get at the PyInterpreters so that we can do virtual method calls on them, +// and registration/deregistration to this table must be done in a thread safe +// manner. This can be easily done if the number of possible PyInterpreters is +// small enough (e.g., 8-bit integer) by simply preallocating an array of +// sufficient size to hold all possible interpreters. Surely 128 threads is +// more than enough for anyone! +// +// I didn't decide to do this technique at the moment, because the extra word +// added by the PyInterpreter tag takes us to 24 words, which means that we +// still fit inside three eight word cache lines. If you need to penny pinch +// another word consider doing this! + +struct C10_API PyInterpreterVTable { + virtual ~PyInterpreterVTable() = default; + + // Report the name of this interpreter + virtual std::string name() const = 0; + + // Run Py_INCREF on a PyObject. + virtual void incref(PyObject* pyobj) const = 0; + // Run Py_DECREF on a PyObject. We DO NOT assume the GIL is held on call + // See NOTE [PyInterpreter::decref takes a `has_pyobj_slot` arg] + virtual void decref(PyObject* pyobj, bool has_pyobj_slot) const = 0; + + // Perform a detach by deferring to the __torch_dispatch__ implementation of + // detach, which will also arrange for the PyObject to get copied in this + // situation + virtual c10::intrusive_ptr detach( + const TensorImpl* self) const = 0; + + // Invoke the Python boxed fallback dispatch to go back into Python + virtual void dispatch(const c10::OperatorHandle& op, torch::jit::Stack* stack) + const = 0; + + virtual void reportErrorCallback(PyObject* callback, DispatchKey key) + const = 0; + + // This is only invoked in the multipy/torchdeploy // codespell:ignore multipy + // situation from pythonOpRegistrationTrampoline; this lets us get to the + // Python interpreter to actually find the appropriate Python op registration + // entry to call. + virtual void python_op_registration_trampoline( + const c10::OperatorHandle& op, + c10::DispatchKey, + c10::DispatchKeySet keyset, + torch::jit::Stack* stack, + bool with_keyset, + bool with_op) const = 0; + + virtual void throw_abstract_impl_not_imported_error( + std::string opname, + const char* pymodule, + const char* context) const = 0; + + // Invoke the Python dispatcher to handle this call + virtual void python_dispatcher( + const c10::OperatorHandle& op, + c10::DispatchKeySet, + torch::jit::Stack* stack) const = 0; + + virtual bool is_contiguous(const TensorImpl* self, at::MemoryFormat) + const = 0; + virtual bool is_strides_like(const TensorImpl* self, at::MemoryFormat) + const = 0; + virtual bool is_non_overlapping_and_dense(const TensorImpl* self) const = 0; + virtual c10::Device device(const TensorImpl* self) const = 0; + virtual int64_t dim(const TensorImpl* self) const = 0; + virtual c10::IntArrayRef strides(const TensorImpl* self) const = 0; + virtual c10::IntArrayRef sizes(const TensorImpl* self) const = 0; + virtual c10::SymIntArrayRef sym_sizes(const TensorImpl* self) const = 0; + virtual c10::Layout layout(const TensorImpl* self) const = 0; + virtual int64_t numel(const TensorImpl* self) const = 0; + virtual c10::SymInt sym_numel(const TensorImpl* self) const = 0; + virtual c10::SymIntArrayRef sym_strides(const TensorImpl* self) const = 0; + virtual c10::SymInt sym_storage_offset(const TensorImpl* self) const = 0; + + virtual void trace_gpu_event_creation( + c10::DeviceType device_type, + uintptr_t event) const = 0; + virtual void trace_gpu_event_deletion( + c10::DeviceType device_type, + uintptr_t event) const = 0; + virtual void trace_gpu_event_record( + c10::DeviceType device_type, + uintptr_t event, + uintptr_t stream) const = 0; + virtual void trace_gpu_event_wait( + c10::DeviceType device_type, + uintptr_t event, + uintptr_t stream) const = 0; + virtual void trace_gpu_memory_allocation( + c10::DeviceType device_type, + uintptr_t ptr) const = 0; + virtual void trace_gpu_memory_deallocation( + c10::DeviceType device_type, + uintptr_t ptr) const = 0; + virtual void trace_gpu_stream_creation( + c10::DeviceType device_type, + uintptr_t stream) const = 0; + virtual void trace_gpu_device_synchronization( + c10::DeviceType device_type) const = 0; + virtual void trace_gpu_stream_synchronization( + c10::DeviceType device_type, + uintptr_t stream) const = 0; + virtual void trace_gpu_event_synchronization( + c10::DeviceType device_type, + uintptr_t event) const = 0; + + virtual void reset_backward_hooks(const TensorImpl* self) const = 0; +}; + +struct C10_API PyInterpreter { + const PyInterpreterVTable* vtable_; + + PyInterpreter(const PyInterpreterVTable* vtable) : vtable_(vtable) {} + + const PyInterpreterVTable& operator*() const noexcept { + return *vtable_; + } + const PyInterpreterVTable* operator->() const noexcept { + return vtable_; + } + + // Disarm this PyInterpreter, making all of its methods noops. + // The vtable pointer is not an atomic at the moment, which means + // a disarm() invocation that is concurrent with active destructors + // is not thread safe and will trigger TSAN. My hope is that this + // situations doesn't ever actually happen; tensor destruction should + // quiesce when a dlclose happens, and any long lived tensors whose + // destructors would be disarmed here only begin the destruction process + // on process shutdown (long after the dlclose has occurred). + void disarm() noexcept; +}; + +// PyInterpreterStatus describes what the state of its interpreter tag +// is, relative to the thread currently holding the GIL. +enum class PyInterpreterStatus { + // We just allocated the Tensor, it hasn't escaped to other threads, + // we know that it definitely hasn't been tagged to be associated + // with an interpreter. + DEFINITELY_UNINITIALIZED, + // We queried the interpreter field and it looked uninitialized. But + // another thread may have raced with us to tag it with some other + // interpreter id. So we will have to do a CEX to make sure we can + // actually nab it. + MAYBE_UNINITIALIZED, + // We queried the interpreter field and it was tagged to belong to us. + // This means we have sole write access (as we hold the GIL for this + // interpreter) + TAGGED_BY_US, + // Someone else tagged this. We can't use this TensorImpl from Python. + TAGGED_BY_OTHER, +}; + +} // namespace c10::impl diff --git a/phivenv/Lib/site-packages/torch/include/c10/core/impl/PyObjectSlot.h b/phivenv/Lib/site-packages/torch/include/c10/core/impl/PyObjectSlot.h new file mode 100644 index 0000000000000000000000000000000000000000..f08d32bfcc97cafbfc8fc28738fd1b6008d149aa --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/core/impl/PyObjectSlot.h @@ -0,0 +1,190 @@ +#pragma once + +#include +#include +#include +#include + +#include + +namespace c10::impl { + +struct C10_API PyObjectSlot { + public: + PyObjectSlot(); + + ~PyObjectSlot(); + + void maybe_destroy_pyobj(); + + // Associate the TensorImpl with the specified PyObject, and, if necessary, + // also tag the interpreter. + // + // NB: This lives in a header so that we can inline away the switch on status + // + // NB: THIS FUNCTION CAN RAISE AN EXCEPTION. Make sure to clean up after + // PyObject if necessary! + void init_pyobj( + PyInterpreter* self_interpreter, + PyObject* pyobj, + PyInterpreterStatus status) { + impl::PyInterpreter* expected = nullptr; + switch (status) { + case impl::PyInterpreterStatus::DEFINITELY_UNINITIALIZED: + // caller guarantees there is no multithreaded access; if there is + // no data race OK to do a relaxed store + pyobj_interpreter_.store(self_interpreter, std::memory_order_relaxed); + break; + case impl::PyInterpreterStatus::TAGGED_BY_US: + // no tagging is necessary, the tag is already correct + break; + case impl::PyInterpreterStatus::MAYBE_UNINITIALIZED: + // attempt to claim this TensorImpl with the specified interpreter + // tag + if (pyobj_interpreter_.compare_exchange_strong( + expected, self_interpreter, std::memory_order_acq_rel)) { + break; + } + // test if, actually, it was already tagged by us! this situation can't + // be caused by a race, but it could be caused by a situation + // where someone conservatively tagged the tensor as MAYBE_UNINITIALIZED + // (because they didn't pre-check the tag) when actually it was + // owned by the interpreter + if (expected == self_interpreter) { + break; + } + // fallthrough, we lost the race. We are guaranteed not to lose the + // race with ourself, as calls to init_pyobj with the same interpreter + // ID must be sequentialized by the GIL + [[fallthrough]]; + case impl::PyInterpreterStatus::TAGGED_BY_OTHER: + TORCH_CHECK( + false, + "cannot allocate PyObject for Tensor on interpreter ", + self_interpreter, + " that has already been used by another torch deploy interpreter ", + pyobj_interpreter_.load()); + } + + // we are the ONLY thread that can have gotten to this point. It is not + // possible to conflict with another zero interpreter as access is protected + // by GIL + // NB: owns_pyobj tag is initially false + pyobj_ = pyobj; + } + + // Query the PyObject interpreter. This may return null if there is no + // interpreter. This is racy! + PyInterpreter* pyobj_interpreter(); + + PyObject* _unchecked_untagged_pyobj() const; + + // Test the interpreter tag. If tagged for the current interpreter, return + // a non-nullopt (but possibly null) PyObject. If (possibly) untagged, + // returns a nullopt. If it is definitely invalid, raises an error. + // + // If `ignore_hermetic_tls` is false and this function is called from a + // hermetic context (ie, `HermeticPyObjectTLS::get_state()` is true), then + // nullopt is returned. If `ignore_hermetic_tls` is true, then the hermetic + // context is ignored, allowing you to check the interpreter tag of a + // nonhermetic PyObject from within a hermetic context. This is necessary + // because there are some cases where the deallocator function of a + // nonhermetic PyObject is called from within a hermetic context, so it must + // be properly treated as a nonhermetic PyObject. + // + // NB: this lives in header so that we can avoid actually creating the + // std::optional + std::optional check_pyobj( + PyInterpreter* self_interpreter, + bool ignore_hermetic_tls = false) const { + // Note [Memory ordering on Python interpreter tag] + impl::PyInterpreter* interpreter = + pyobj_interpreter_.load(std::memory_order_acquire); + if (interpreter == nullptr) { + // NB: This never returns DEFINITELY_UNINITIALIZED because there is + // always the possibility that another thread races to initialize + // after we query here. The only time when we can conclude a tensor + // is definitely uninitialized is when we have just allocated it and + // it cannot have escaped to other threads yet + return std::nullopt; + } else if (interpreter == self_interpreter) { + // NB: pyobj_ could still be null! + if (!ignore_hermetic_tls && c10::impl::HermeticPyObjectTLS::get_state()) { + return std::nullopt; + } else { + return _unchecked_untagged_pyobj(); + } + } else { + TORCH_CHECK( + false, + "cannot access PyObject for Tensor on interpreter ", + (*self_interpreter)->name(), + " that has already been used by another torch deploy interpreter ", + (*pyobj_interpreter_.load())->name()); + } + } + + // Clear the PyObject field for an interpreter, in situations where we + // statically know the tensor is tagged with our interpreter. + void unchecked_clear_pyobj(PyInterpreter* interpreter); + + PyInterpreter& load_pyobj_interpreter() const; + + // Check if the PyObjectSlot's interpreter is the same as the specified + // interpreter + bool check_interpreter(PyInterpreter* interpreter); + + // Check if the PyObjectSlot is holding a PyObject, owned or non-owned + bool has_pyobj_nonhermetic(); + + bool owns_pyobj(); + + void set_owns_pyobj(bool b); + + private: + // This field contains the interpreter tag for this object. See + // Note [Python interpreter tag] for general context + // + // Note [Memory ordering on Python interpreter tag] + // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + // What memory_order do we need when accessing this atomic? We don't + // need a single total modification order (as provided by + // memory_order_seq_cst) as pyobj_interpreter_ is monotonic: it can only + // transition from -1 to some positive integer and never changes afterwards. + // Because there is only one modification, it trivially already has a total + // modification order (e.g., we don't need fences or locked instructions on + // x86) + // + // In fact, one could make a reasonable argument that relaxed reads are OK, + // due to the presence of external locking (GIL) to ensure that interactions + // with other data structures are still correctly synchronized, so that + // we fall in the "Single-Location Data Structures" case as described in + // http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p2055r0.pdf + // However, on x86, it doesn't matter if I use acquire or relaxed on the load + // as I get the same assembly in both cases. So I just use the more + // conservative acquire (which will impede compiler optimizations but I don't + // care) + std::atomic pyobj_interpreter_; + + // This field contains a reference to a PyObject representing this Tensor. + // If pyobj is nullptr, when we transfer Tensor to Python, we allocate a new + // PyObject for it and set this field. This field does not have to be + // protected by an atomic as it is only allowed to be accessed when you hold + // the GIL, or during destruction of the tensor. + // + // When a PyObject dies, you are obligated to clear this field + // (otherwise, you will try to use-after-free the pyobj); this currently + // occurs in THPVariable_clear in torch/csrc/autograd/python_variable.cpp + // + // NB: Ordinarily, this should not be a strong reference, as if the + // PyObject owns the Tensor, this would create a reference cycle. + // However, sometimes this ownership flips. To track who owns + // who, this has a single pointer tag indicating whether or not the + // C++ object owns the PyObject (the common case, zero, means PyObject + // owns the C++ object); see _unchecked_untagged_pyobj for raw access + // or check_pyobj for checked access. See references to PyObject + // resurrection in torch/csrc/autograd/python_variable.cpp + PyObject* pyobj_; +}; + +} // namespace c10::impl diff --git a/phivenv/Lib/site-packages/torch/include/c10/core/impl/PythonDispatcherTLS.h b/phivenv/Lib/site-packages/torch/include/c10/core/impl/PythonDispatcherTLS.h new file mode 100644 index 0000000000000000000000000000000000000000..de721fc6a1b785dcf36065a8b188302aed8b6f21 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/core/impl/PythonDispatcherTLS.h @@ -0,0 +1,29 @@ +#pragma once + +#include +#include + +namespace c10::impl { + +struct C10_API PythonDispatcherTLS { + static void set_state(PyInterpreter* state); + static PyInterpreter* get_state(); + static void reset_state(); +}; + +struct C10_API DisablePythonDispatcher { + DisablePythonDispatcher() : old_(PythonDispatcherTLS::get_state()) { + PythonDispatcherTLS::set_state({}); + } + + DisablePythonDispatcher(DisablePythonDispatcher&& other) = delete; + DisablePythonDispatcher(const DisablePythonDispatcher&) = delete; + DisablePythonDispatcher& operator=(const DisablePythonDispatcher&) = delete; + DisablePythonDispatcher& operator=(DisablePythonDispatcher&&) = delete; + ~DisablePythonDispatcher() { + PythonDispatcherTLS::set_state(old_); + } + PyInterpreter* old_; +}; + +} // namespace c10::impl diff --git a/phivenv/Lib/site-packages/torch/include/c10/core/impl/SizesAndStrides.h b/phivenv/Lib/site-packages/torch/include/c10/core/impl/SizesAndStrides.h new file mode 100644 index 0000000000000000000000000000000000000000..4bba0d2870604e7bad99dfd5a332eea2f6348831 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/core/impl/SizesAndStrides.h @@ -0,0 +1,327 @@ +#pragma once + +#include +#include + +#include +#include +#include + +#define C10_SIZES_AND_STRIDES_MAX_INLINE_SIZE 5 + +namespace c10::impl { + +// Packed container for TensorImpl sizes and strides. +// This design improves on the previous approach of using a pair of +// c10::SmallVector by specializing for the operations we +// actually use and enforcing that the number of sizes is the same as +// the number of strides. The memory layout is as follows: +// +// 1 size_t for the size +// 5 eightbytes of inline sizes and 5 eightbytes of inline strides, OR pointer +// to out-of-line array +class C10_API SizesAndStrides { + public: + // TODO: different iterator types for sizes & strides to prevent + // mixing the two accidentally. + using sizes_iterator = int64_t*; + using sizes_const_iterator = const int64_t*; + using strides_iterator = int64_t*; + using strides_const_iterator = const int64_t*; + + SizesAndStrides() { + size_at_unchecked(0) = 0; + stride_at_unchecked(0) = 1; + } + + ~SizesAndStrides() { + if (C10_UNLIKELY(!isInline())) { + // NOLINTNEXTLINE(cppcoreguidelines-no-malloc) + free(outOfLineStorage_); + } + } + + SizesAndStrides(const SizesAndStrides& rhs) : size_(rhs.size_) { + if (C10_LIKELY(rhs.isInline())) { + copyDataInline(rhs); + } else { + allocateOutOfLineStorage(size_); + copyDataOutline(rhs); + } + } + + bool operator==(const SizesAndStrides& other) const { + if (size_ != other.size_) { + return false; + } + return !( + isInline() + ? std::memcmp( + inlineStorage_, other.inlineStorage_, sizeof(inlineStorage_)) + : std::memcmp( + outOfLineStorage_, + other.outOfLineStorage_, + storageBytes(size_))); + } + + SizesAndStrides& operator=(const SizesAndStrides& rhs) { + if (this == &rhs) { + return *this; + } + if (C10_LIKELY(rhs.isInline())) { + if (C10_UNLIKELY(!isInline())) { + // NOLINTNEXTLINE(cppcoreguidelines-no-malloc) + free(outOfLineStorage_); + } + copyDataInline(rhs); + } else { + if (isInline()) { + allocateOutOfLineStorage(rhs.size_); + } else { + resizeOutOfLineStorage(rhs.size_); + } + copyDataOutline(rhs); + } + size_ = rhs.size_; + return *this; + } + + // Move from rhs. rhs.size() == 0 afterwards. + SizesAndStrides(SizesAndStrides&& rhs) noexcept : size_(rhs.size_) { + if (C10_LIKELY(isInline())) { + memcpy(inlineStorage_, rhs.inlineStorage_, sizeof(inlineStorage_)); + } else { + outOfLineStorage_ = rhs.outOfLineStorage_; + rhs.outOfLineStorage_ = nullptr; + } + + rhs.size_ = 0; + } + + // Move from rhs. rhs.size() == 0 afterwards. + SizesAndStrides& operator=(SizesAndStrides&& rhs) noexcept { + if (this == &rhs) { + return *this; + } + if (C10_LIKELY(rhs.isInline())) { + if (C10_UNLIKELY(!isInline())) { + // NOLINTNEXTLINE(cppcoreguidelines-no-malloc) + free(outOfLineStorage_); + } + copyDataInline(rhs); + } else { + // They're outline. We're going to steal their vector. + if (!isInline()) { + // NOLINTNEXTLINE(cppcoreguidelines-no-malloc) + free(outOfLineStorage_); + } + outOfLineStorage_ = rhs.outOfLineStorage_; + rhs.outOfLineStorage_ = nullptr; + } + size_ = rhs.size_; + rhs.size_ = 0; + + return *this; + } + + size_t size() const noexcept { + return size_; + } + + const int64_t* sizes_data() const noexcept { + if (C10_LIKELY(isInline())) { + return &inlineStorage_[0]; + } else { + return &outOfLineStorage_[0]; + } + } + + int64_t* sizes_data() noexcept { + if (C10_LIKELY(isInline())) { + return &inlineStorage_[0]; + } else { + return &outOfLineStorage_[0]; + } + } + + sizes_const_iterator sizes_begin() const noexcept { + return sizes_data(); + } + + sizes_iterator sizes_begin() noexcept { + return sizes_data(); + } + + sizes_const_iterator sizes_end() const noexcept { + return sizes_begin() + size(); + } + + sizes_iterator sizes_end() noexcept { + return sizes_begin() + size(); + } + + IntArrayRef sizes_arrayref() const noexcept { + return IntArrayRef{sizes_data(), size()}; + } + + void set_sizes(IntArrayRef newSizes) { + resize(newSizes.size()); + std::copy(newSizes.begin(), newSizes.end(), sizes_begin()); + } + + void set_strides(IntArrayRef strides) { + TORCH_INTERNAL_ASSERT(strides.size() == size()); + std::copy(strides.begin(), strides.end(), strides_begin()); + } + + const int64_t* strides_data() const noexcept { + if (C10_LIKELY(isInline())) { + return &inlineStorage_[C10_SIZES_AND_STRIDES_MAX_INLINE_SIZE]; + } else { + return &outOfLineStorage_[size()]; + } + } + + int64_t* strides_data() noexcept { + if (C10_LIKELY(isInline())) { + return &inlineStorage_[C10_SIZES_AND_STRIDES_MAX_INLINE_SIZE]; + } else { + return &outOfLineStorage_[size()]; + } + } + + strides_const_iterator strides_begin() const noexcept { + if (C10_LIKELY(isInline())) { + return &inlineStorage_[C10_SIZES_AND_STRIDES_MAX_INLINE_SIZE]; + } else { + return &outOfLineStorage_[size()]; + } + } + + strides_iterator strides_begin() noexcept { + if (C10_LIKELY(isInline())) { + return &inlineStorage_[C10_SIZES_AND_STRIDES_MAX_INLINE_SIZE]; + } else { + return &outOfLineStorage_[size()]; + } + } + + strides_const_iterator strides_end() const noexcept { + return strides_begin() + size(); + } + + strides_iterator strides_end() noexcept { + return strides_begin() + size(); + } + + IntArrayRef strides_arrayref() const noexcept { + return IntArrayRef{strides_data(), size()}; + } + + // Size accessors. + int64_t size_at(size_t idx) const noexcept { + assert(idx < size()); + return sizes_data()[idx]; + } + + int64_t& size_at(size_t idx) noexcept { + assert(idx < size()); + return sizes_data()[idx]; + } + + int64_t size_at_unchecked(size_t idx) const noexcept { + return sizes_data()[idx]; + } + + int64_t& size_at_unchecked(size_t idx) noexcept { + return sizes_data()[idx]; + } + + // Size accessors. + int64_t stride_at(size_t idx) const noexcept { + assert(idx < size()); + return strides_data()[idx]; + } + + int64_t& stride_at(size_t idx) noexcept { + assert(idx < size()); + return strides_data()[idx]; + } + + int64_t stride_at_unchecked(size_t idx) const noexcept { + return strides_data()[idx]; + } + + int64_t& stride_at_unchecked(size_t idx) noexcept { + return strides_data()[idx]; + } + + void resize(size_t newSize) { + const auto oldSize = size(); + if (newSize == oldSize) { + return; + } + if (C10_LIKELY( + newSize <= C10_SIZES_AND_STRIDES_MAX_INLINE_SIZE && isInline())) { + if (oldSize < newSize) { + const auto bytesToZero = + (newSize - oldSize) * sizeof(inlineStorage_[0]); + memset(&inlineStorage_[oldSize], 0, bytesToZero); + memset( + &inlineStorage_[C10_SIZES_AND_STRIDES_MAX_INLINE_SIZE + oldSize], + 0, + bytesToZero); + } + size_ = newSize; + } else { + resizeSlowPath(newSize, oldSize); + } + } + + void resizeSlowPath(size_t newSize, size_t oldSize); + + private: + bool isInline() const noexcept { + return size_ <= C10_SIZES_AND_STRIDES_MAX_INLINE_SIZE; + } + + void copyDataInline(const SizesAndStrides& rhs) { + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(rhs.isInline()); + memcpy(inlineStorage_, rhs.inlineStorage_, sizeof(inlineStorage_)); + } + + static size_t storageBytes(size_t size) noexcept { + return size * 2 * sizeof(int64_t); + } + + void allocateOutOfLineStorage(size_t size) { + // NOLINTNEXTLINE(cppcoreguidelines-no-malloc) + outOfLineStorage_ = static_cast(malloc(storageBytes(size))); + TORCH_CHECK( + outOfLineStorage_, + "Could not allocate memory for Tensor SizesAndStrides!"); + } + + void resizeOutOfLineStorage(size_t newSize) { + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(!isInline()); + outOfLineStorage_ = static_cast( + // NOLINTNEXTLINE(cppcoreguidelines-no-malloc) + realloc(outOfLineStorage_, storageBytes(newSize))); + TORCH_CHECK( + outOfLineStorage_, + "Could not allocate memory for Tensor SizesAndStrides!"); + } + + void copyDataOutline(const SizesAndStrides& rhs) noexcept { + memcpy(outOfLineStorage_, rhs.outOfLineStorage_, storageBytes(rhs.size_)); + } + + size_t size_{1}; + union { + int64_t* outOfLineStorage_; + // NOLINTNEXTLINE(*c-array*) + int64_t inlineStorage_[C10_SIZES_AND_STRIDES_MAX_INLINE_SIZE * 2]{}; + }; +}; + +} // namespace c10::impl diff --git a/phivenv/Lib/site-packages/torch/include/c10/core/impl/TorchDispatchModeTLS.h b/phivenv/Lib/site-packages/torch/include/c10/core/impl/TorchDispatchModeTLS.h new file mode 100644 index 0000000000000000000000000000000000000000..48d09f4d8d6fb9f66187c3245633578993aef22d --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/core/impl/TorchDispatchModeTLS.h @@ -0,0 +1,67 @@ +#pragma once + +#include +#include + +namespace c10::impl { + +enum class TorchDispatchModeKey : int8_t { + FAKE, + PROXY, + FUNCTIONAL, + NUM_MODE_KEYS +}; + +using PyObject_TorchDispatchMode = SafePyObjectT; + +struct C10_API TorchDispatchModeTLS { + // This API is NOT invariant safe. + // It must not take in an infra mode that uses TorchDispatchModeKey + // If you're pushing an infra mode onto the stack, we expect + // you to use set_mode + static void push_non_infra_mode_onto_stack( + std::shared_ptr mode); + // Pops the top mode of the stack, + // giving precedence to user modes before attempting to pop + // any infra modes + static const std::shared_ptr pop_stack(); + // Returns the highest-priority infra mode on the stack, + // along with its mode key. + static const std:: + tuple, TorchDispatchModeKey> + pop_highest_infra_mode(); + + static const std::shared_ptr& get_stack_at( + int64_t idx); + static int64_t stack_len(); + + static const std::optional> + get_mode(TorchDispatchModeKey mode_key); + static const std::optional> + unset_mode(TorchDispatchModeKey mode_key); + static void set_mode( + const std::shared_ptr& mode, + TorchDispatchModeKey mode_key); + + static const TorchDispatchModeTLS& get_state(); + static void set_state(TorchDispatchModeTLS state); + + static bool any_modes_set(bool skip_infra_modes = false); + + private: + std::vector> stack_; + // Users are allowed to push multiple ProxyTorchDispatchMode objects onto the + // stack + // However, we only allow a single FakeTensorMode onto the stack at a time + // (Pushing additional FakeTensorModes onto the stack is a no-op) + std::array< + std::optional>, + static_cast(TorchDispatchModeKey::NUM_MODE_KEYS)> + infra_modes_; +}; + +C10_API bool dispatch_mode_enabled(); + +C10_API std::string to_string(TorchDispatchModeKey mode_key); + +} // namespace c10::impl diff --git a/phivenv/Lib/site-packages/torch/include/c10/core/impl/VirtualGuardImpl.h b/phivenv/Lib/site-packages/torch/include/c10/core/impl/VirtualGuardImpl.h new file mode 100644 index 0000000000000000000000000000000000000000..77175d235fb899613661d3bf80e0c6cc6c66fc04 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/core/impl/VirtualGuardImpl.h @@ -0,0 +1,108 @@ +#pragma once + +#include + +namespace c10::impl { + +/** + * An implementation of DeviceGuardImplInterface which delegates + * to virtual dispatch on the DeviceGuardImpl registry. + */ +class VirtualGuardImpl final : public DeviceGuardImplInterface { + public: + VirtualGuardImpl(DeviceType device_type) + : impl_(getDeviceGuardImpl(device_type)) {} + // This constructor exists purely for testing + VirtualGuardImpl(const DeviceGuardImplInterface* impl) : impl_(impl) {} + + // Copying and moving is OK! + VirtualGuardImpl(const VirtualGuardImpl&) = default; + VirtualGuardImpl& operator=(const VirtualGuardImpl&) = default; + VirtualGuardImpl(VirtualGuardImpl&&) noexcept = default; + VirtualGuardImpl& operator=(VirtualGuardImpl&&) noexcept = default; + ~VirtualGuardImpl() override = default; + + DeviceType type() const override { + return impl_->type(); + } + Device exchangeDevice(Device d) const override { + return impl_->exchangeDevice(d); + } + Device getDevice() const override { + return impl_->getDevice(); + } + void setDevice(Device d) const override { + impl_->setDevice(d); + } + void uncheckedSetDevice(Device d) const noexcept override { + impl_->uncheckedSetDevice(d); + } + Stream getStream(Device d) const override { + return impl_->getStream(d); + } + Stream getNewStream(Device d, int priority = 0) const override { + return impl_->getNewStream(d, priority); + } + Stream getDefaultStream(Device d) const override { + return impl_->getDefaultStream(d); + } + Stream getStreamFromGlobalPool(Device d, bool isHighPriority = false) + const override { + return impl_->getStreamFromGlobalPool(d, isHighPriority); + } + Stream exchangeStream(Stream s) const override { + return impl_->exchangeStream(s); + } + DeviceIndex deviceCount() const noexcept override { + return impl_->deviceCount(); + } + + // Event functions + void record( + void** event, + const Stream& stream, + const DeviceIndex device_index, + const EventFlag flag) const override { + impl_->record(event, stream, device_index, flag); + } + void block(void* event, const Stream& stream) const override { + impl_->block(event, stream); + } + bool queryEvent(void* event) const override { + return impl_->queryEvent(event); + } + void destroyEvent(void* event, const DeviceIndex device_index) + const noexcept override { + impl_->destroyEvent(event, device_index); + } + + bool queryStream(const Stream& stream) const override { + return impl_->queryStream(stream); + } + void synchronizeStream(const Stream& stream) const override { + impl_->synchronizeStream(stream); + } + + void recordDataPtrOnStream(const c10::DataPtr& data_ptr, const Stream& stream) + const override { + impl_->recordDataPtrOnStream(data_ptr, stream); + } + + double elapsedTime(void* event1, void* event2, const DeviceIndex device_index) + const override { + return impl_->elapsedTime(event1, event2, device_index); + } + + void synchronizeEvent(void* event) const override { + return impl_->synchronizeEvent(event); + } + + void synchronizeDevice(const DeviceIndex device_index) const override { + return impl_->synchronizeDevice(device_index); + } + + private: + const DeviceGuardImplInterface* impl_ = nullptr; +}; + +} // namespace c10::impl diff --git a/phivenv/Lib/site-packages/torch/include/c10/core/impl/alloc_cpu.h b/phivenv/Lib/site-packages/torch/include/c10/core/impl/alloc_cpu.h new file mode 100644 index 0000000000000000000000000000000000000000..7e8f4ae4583a7c28b6cd9b4b1dda0adc80a19d8e --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/core/impl/alloc_cpu.h @@ -0,0 +1,27 @@ +#pragma once + +#include +#include + +#include + +namespace c10 { + +C10_API void* alloc_cpu(size_t nbytes); +C10_API void free_cpu(void* data); + +#if defined(__linux__) && !defined(__ANDROID__) +C10_API size_t c10_compute_alignment(size_t nbytes); +#endif + +#ifdef USE_MIMALLOC_ON_MKL +namespace mi_malloc_wrapper { +C10_API void* c10_mi_malloc(size_t size); +C10_API void* c10_mi_calloc(size_t count, size_t size); +C10_API void* c10_mi_realloc(void* p, size_t newsize); +C10_API void* c10_mi_malloc_aligned(size_t size, size_t alignment); +C10_API void c10_mi_free(void* p); +} // namespace mi_malloc_wrapper +#endif + +} // namespace c10 diff --git a/phivenv/Lib/site-packages/torch/include/c10/core/thread_pool.h b/phivenv/Lib/site-packages/torch/include/c10/core/thread_pool.h new file mode 100644 index 0000000000000000000000000000000000000000..0cc15b325dfb9593205253efba320ec428294ea0 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/core/thread_pool.h @@ -0,0 +1,120 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace c10 { + +class C10_API TaskThreadPoolBase { + public: + virtual void run(std::function func) = 0; + + virtual size_t size() const = 0; + + /** + * The number of available (i.e. idle) threads in this thread pool. + */ + virtual size_t numAvailable() const = 0; + + /** + * Check if the current thread is from the thread pool. + */ + virtual bool inThreadPool() const = 0; + + virtual ~TaskThreadPoolBase() noexcept = default; + + static size_t defaultNumThreads(); +}; + +class C10_API ThreadPool : public c10::TaskThreadPoolBase { + protected: + struct task_element_t { + bool run_with_id; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const std::function no_id; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const std::function with_id; + + explicit task_element_t(std::function f) + : run_with_id(false), no_id(std::move(f)), with_id(nullptr) {} + explicit task_element_t(std::function f) + : run_with_id(true), no_id(nullptr), with_id(std::move(f)) {} + }; + + std::queue tasks_; + std::vector threads_; + mutable std::mutex mutex_; + std::condition_variable condition_; + std::condition_variable completed_; + std::atomic_bool running_; + bool complete_; + std::size_t available_; + std::size_t total_; + int numa_node_id_; + + public: + ThreadPool() = delete; + + explicit ThreadPool( + int pool_size, + int numa_node_id = -1, + const std::function& init_thread = nullptr); + + ~ThreadPool() override; + + size_t size() const override; + + size_t numAvailable() const override; + + bool inThreadPool() const override; + + void run(std::function func) override; + + template + void runTaskWithID(Task task) { + std::unique_lock lock(mutex_); + + // Set task and signal condition variable so that a worker thread will + // wake up and use the task. + tasks_.emplace(static_cast>(task)); + complete_ = false; + condition_.notify_one(); + } + + /// @brief Wait for queue to be empty + void waitWorkComplete(); + + private: + // @brief Entry point for pool threads. + void main_loop(std::size_t index); +}; + +class C10_API TaskThreadPool : public c10::ThreadPool { + public: + explicit TaskThreadPool(int pool_size, int numa_node_id = -1) + : ThreadPool(pool_size, numa_node_id, [numa_node_id]() { + setThreadName("CaffeTaskThread"); + NUMABind(numa_node_id); + }) {} +}; + +C10_DECLARE_SHARED_REGISTRY( + ThreadPoolRegistry, + TaskThreadPoolBase, + int, + int, + bool); + +} // namespace c10 diff --git a/phivenv/Lib/site-packages/torch/include/c10/cuda/CUDAAlgorithm.h b/phivenv/Lib/site-packages/torch/include/c10/cuda/CUDAAlgorithm.h new file mode 100644 index 0000000000000000000000000000000000000000..f771421d4c8c4e4270ee6ff27ef736bbbcec318e --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/cuda/CUDAAlgorithm.h @@ -0,0 +1,31 @@ +#ifdef THRUST_DEVICE_LOWER_BOUND_WORKS +#include +#include +#include +#include +#endif +namespace c10::cuda { +#ifdef THRUST_DEVICE_LOWER_BOUND_WORKS +template +__forceinline__ __device__ Iter +lower_bound(Iter start, Iter end, Scalar value) { + return thrust::lower_bound(thrust::device, start, end, value); +} +#else +// thrust::lower_bound is broken on device, see +// https://github.com/NVIDIA/thrust/issues/1734 Implementation inspired by +// https://github.com/pytorch/pytorch/blob/805120ab572efef66425c9f595d9c6c464383336/aten/src/ATen/native/cuda/Bucketization.cu#L28 +template +__device__ Iter lower_bound(Iter start, Iter end, Scalar value) { + while (start < end) { + auto mid = start + ((end - start) >> 1); + if (*mid < value) { + start = mid + 1; + } else { + end = mid; + } + } + return end; +} +#endif // THRUST_DEVICE_LOWER_BOUND_WORKS +} // namespace c10::cuda diff --git a/phivenv/Lib/site-packages/torch/include/c10/cuda/CUDAAllocatorConfig.h b/phivenv/Lib/site-packages/torch/include/c10/cuda/CUDAAllocatorConfig.h new file mode 100644 index 0000000000000000000000000000000000000000..72393032c131ae37b25e7080ee34a77686aa73b8 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/cuda/CUDAAllocatorConfig.h @@ -0,0 +1,164 @@ +#pragma once + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace c10::cuda::CUDACachingAllocator { + +enum class Expandable_Segments_Handle_Type : int { + UNSPECIFIED = 0, + POSIX_FD = 1, + FABRIC_HANDLE = 2, +}; + +// Environment config parser +class C10_CUDA_API CUDAAllocatorConfig { + public: + static size_t max_split_size() { + return instance().m_max_split_size; + } + static double garbage_collection_threshold() { + return instance().m_garbage_collection_threshold; + } + + static bool expandable_segments() { +#ifndef PYTORCH_C10_DRIVER_API_SUPPORTED + if (instance().m_expandable_segments) { + TORCH_WARN_ONCE("expandable_segments not supported on this platform") + } + return false; +#else + return instance().m_expandable_segments; +#endif + } + + static Expandable_Segments_Handle_Type expandable_segments_handle_type() { + return instance().m_expandable_segments_handle_type; + } + + static void set_expandable_segments_handle_type( + Expandable_Segments_Handle_Type handle_type) { + instance().m_expandable_segments_handle_type = handle_type; + } + + static bool release_lock_on_cudamalloc() { + return instance().m_release_lock_on_cudamalloc; + } + + /** Pinned memory allocator settings */ + static bool pinned_use_cuda_host_register() { + return instance().m_pinned_use_cuda_host_register; + } + + static size_t pinned_num_register_threads() { + return instance().m_pinned_num_register_threads; + } + + static bool pinned_use_background_threads() { + return instance().m_pinned_use_background_threads; + } + + static size_t pinned_max_register_threads() { + // Based on the benchmark results, we see better allocation performance + // with 8 threads. However on future systems, we may need more threads + // and limiting this to 128 threads. + return 128; + } + + // This is used to round-up allocation size to nearest power of 2 divisions. + // More description below in function roundup_power2_next_division + // As an example, if we want 4 divisions between 2's power, this can be done + // using env variable: PYTORCH_CUDA_ALLOC_CONF=roundup_power2_divisions:4 + static size_t roundup_power2_divisions(size_t size); + + static std::vector roundup_power2_divisions() { + return instance().m_roundup_power2_divisions; + } + + static size_t max_non_split_rounding_size() { + return instance().m_max_non_split_rounding_size; + } + + static std::string last_allocator_settings() { + std::lock_guard lock( + instance().m_last_allocator_settings_mutex); + return instance().m_last_allocator_settings; + } + + static CUDAAllocatorConfig& instance() { + static CUDAAllocatorConfig* s_instance = ([]() { + auto inst = new CUDAAllocatorConfig(); + auto env = c10::utils::get_env("PYTORCH_CUDA_ALLOC_CONF"); +#ifdef USE_ROCM + // convenience for ROCm users, allow alternative HIP token + if (!env.has_value()) { + env = c10::utils::get_env("PYTORCH_HIP_ALLOC_CONF"); + } +#endif + inst->parseArgs(env); + return inst; + })(); + return *s_instance; + } + + void parseArgs(const std::optional& env); + + private: + CUDAAllocatorConfig(); + + static void lexArgs(const std::string& env, std::vector& config); + static void consumeToken( + const std::vector& config, + size_t i, + const char c); + size_t parseMaxSplitSize(const std::vector& config, size_t i); + size_t parseMaxNonSplitRoundingSize( + const std::vector& config, + size_t i); + size_t parseGarbageCollectionThreshold( + const std::vector& config, + size_t i); + size_t parseRoundUpPower2Divisions( + const std::vector& config, + size_t i); + size_t parseAllocatorConfig( + const std::vector& config, + size_t i, + bool& used_cudaMallocAsync); + size_t parsePinnedUseCudaHostRegister( + const std::vector& config, + size_t i); + size_t parsePinnedNumRegisterThreads( + const std::vector& config, + size_t i); + size_t parsePinnedUseBackgroundThreads( + const std::vector& config, + size_t i); + + std::atomic m_max_split_size; + std::atomic m_max_non_split_rounding_size; + std::vector m_roundup_power2_divisions; + std::atomic m_garbage_collection_threshold; + std::atomic m_pinned_num_register_threads; + std::atomic m_expandable_segments; + std::atomic + m_expandable_segments_handle_type; + std::atomic m_release_lock_on_cudamalloc; + std::atomic m_pinned_use_cuda_host_register; + std::atomic m_pinned_use_background_threads; + std::string m_last_allocator_settings; + std::mutex m_last_allocator_settings_mutex; +}; + +// General caching allocator utilities +C10_CUDA_API void setAllocatorSettings(const std::string& env); + +} // namespace c10::cuda::CUDACachingAllocator diff --git a/phivenv/Lib/site-packages/torch/include/c10/cuda/CUDACachingAllocator.h b/phivenv/Lib/site-packages/torch/include/c10/cuda/CUDACachingAllocator.h new file mode 100644 index 0000000000000000000000000000000000000000..522b6e4e32e67bfa2d79196579c4b6a6ca1509d9 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/cuda/CUDACachingAllocator.h @@ -0,0 +1,563 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace c10 { + +// Caching allocator will execute every registered callback if it unable to find +// block inside of already allocated area. +class C10_CUDA_API FreeMemoryCallback { + public: + virtual ~FreeMemoryCallback() = default; + virtual bool Execute() = 0; +}; + +C10_DECLARE_REGISTRY(FreeCudaMemoryCallbacksRegistry, FreeMemoryCallback); +#define REGISTER_FREE_MEMORY_CALLBACK(name, ...) \ + C10_REGISTER_CLASS(FreeCudaMemoryCallbacksRegistry, name, __VA_ARGS__) +} // namespace c10 + // +// TODO: Turn this into an honest to goodness class. I briefly attempted to do +// this, but it was a bit irritating to figure out how to also correctly +// apply pimpl pattern so I didn't have to leak any internal implementation +// details in the header (CUDACachingAllocator could be made a pimpl, but +// you also need to appropriately define a class which is a subclass +// of Allocator. Not impossible, but required a bit more surgery than +// I wanted to do at the time.) +// +// Why is this using a namespace rather than old-style THCCachingAllocator_ +// prefix? Mostly because it made the HIPify rules easier to write; _ is +// not counted as a word boundary, so you would otherwise have to list each +// of these functions. + +namespace c10::cuda::CUDACachingAllocator { + +// Preserved only for BC reasons +// NOLINTNEXTLINE(misc-unused-using-decls) +using c10::CachingDeviceAllocator::DeviceStats; + +extern const size_t kLargeBuffer; + +typedef std::shared_ptr (*CreateContextFn)(); + +// Struct containing info of an allocation block (i.e. a fractional part of a +// cudaMalloc).. +struct BlockInfo { + size_t size = 0; + size_t requested_size = 0; + int32_t gc_counter = 0; + bool allocated = false; + bool active = false; + std::shared_ptr + context_when_allocated; // per-watcher context +}; + +// Struct containing info of a memory segment (i.e. one contiguous cudaMalloc). +struct SegmentInfo { + c10::DeviceIndex device = 0; + size_t address = 0; + size_t total_size = 0; + size_t requested_size = 0; // unrounded, actually requested size + size_t allocated_size = 0; + size_t active_size = 0; + cudaStream_t stream = nullptr; + bool is_large = false; + bool is_expandable = false; + MempoolId_t owner_private_pool_id = {0, 0}; + std::vector blocks; + std::shared_ptr context_when_allocated; +}; + +struct AllocatorState { + virtual ~AllocatorState() = default; +}; + +union trace_time_ { + time_t t_; + approx_time_t approx_t_; +}; + +struct TraceEntry { + enum Action { + ALLOC, // API made to the caching allocator for new memory + FREE_REQUESTED, // API call made to the caching allocator to free memory + FREE_COMPLETED, // The allocator might have to delay a free because + // it is still in use on another stream via record_stream + // This event is generated when a free actually completes. + SEGMENT_ALLOC, // a call to cudaMalloc to get more memory from the OS + SEGMENT_FREE, // a call to cudaFree to return memory to the OS (e.g. to + // defragment or empty_caches) + SEGMENT_MAP, // a call to cuMemMap (used with expandable_segments) + SEGMENT_UNMAP, // unmap part of a segment (used with expandable segments) + SNAPSHOT, // a call to snapshot, used to correlate memory snapshots to trace + // events + OOM // the allocator threw an OutOfMemoryError (addr_ is the amount of free + // bytes reported by cuda) + }; + TraceEntry( + Action action, + c10::DeviceIndex device, + size_t addr, + size_t size, + cudaStream_t stream, + MempoolId_t mempool, + approx_time_t time, + std::shared_ptr context = nullptr, + std::string compile_context = "") + : action_(action), + device_(device), + addr_(addr), + context_(std::move(context)), + stream_(stream), + size_(size), + mempool_(std::move(mempool)), + compile_context_(std::move(compile_context)) { + time_.approx_t_ = time; + } + Action action_; + c10::DeviceIndex device_; + size_t addr_; // for OOM, this is the amount of free bytes reported by cuda + std::shared_ptr context_; + cudaStream_t stream_{}; + size_t size_; + MempoolId_t mempool_; + trace_time_ time_{}; + std::string compile_context_{}; +}; + +// Calls made by record_function will save annotations +struct AnnotationEntry { + AnnotationEntry(c10::DeviceIndex device, approx_time_t time) + : device_(device) { + time_.approx_t_ = time; + } + + void recordUserMetadata(const std::string& name, std::string value) { + metadata_[name] = std::move(value); + } + + c10::DeviceIndex device_; + trace_time_ time_{}; + std::unordered_map metadata_; +}; + +struct AllocatorConfigInfo { + double garbage_collection_threshold; + size_t max_split_size; + size_t pinned_num_register_threads; + bool expandable_segments; + bool release_lock_on_malloc; + bool pinned_use_host_register; + std::string last_allocator_settings; + std::vector roundup_power2_divisions; +}; + +struct SnapshotInfo { + std::vector segments; + std::vector> device_traces; + std::vector external_annotations; + AllocatorConfigInfo config_metadata; +}; + +// returns the pointers freed in the pool +// and the pointers allocated. Note: a pointer +// may appear in both freed and allocated +struct CheckpointDelta { + std::vector ptrs_freed; + std::vector dataptrs_allocd; +}; + +enum struct RecordContext { + NEVER = 0, + STATE = 1, // only keep stacks for active allocations + ALLOC = 2, // additionally keep stacks for allocations in the trace history + ALL = 3, // additionally record stacks for when something is freed +}; + +using OutOfMemoryObserver = std::function; + +using AllocatorTraceTracker = std::function; + +struct ShareableHandle { + ptrdiff_t offset; + std::string handle; +}; + +class CUDAAllocator : public Allocator { + public: + virtual void* raw_alloc(size_t nbytes) = 0; + virtual void* raw_alloc_with_stream(size_t nbytes, cudaStream_t stream) = 0; + virtual void raw_delete(void* ptr) = 0; + virtual void init(int device_count) = 0; + virtual bool initialized() = 0; + virtual double getMemoryFraction(c10::DeviceIndex device) = 0; + virtual void setMemoryFraction(double fraction, c10::DeviceIndex device) = 0; + virtual void emptyCache(MempoolId_t mempool_id = {0, 0}) = 0; + virtual void enable(bool value) = 0; + virtual bool isEnabled() const = 0; + virtual void cacheInfo(c10::DeviceIndex device, size_t* largestBlock) = 0; + virtual void* getBaseAllocation(void* ptr, size_t* size) = 0; + virtual void recordStream(const DataPtr&, CUDAStream stream) = 0; + virtual c10::CachingDeviceAllocator::DeviceStats getDeviceStats( + c10::DeviceIndex device) = 0; + virtual void resetAccumulatedStats(c10::DeviceIndex device) = 0; + virtual void resetPeakStats(c10::DeviceIndex device) = 0; + virtual SnapshotInfo snapshot(MempoolId_t mempool_id = {0, 0}) = 0; + virtual void beginAllocateToPool( + c10::DeviceIndex device, + MempoolId_t mempool_id, + std::function filter) = 0; + virtual void endAllocateToPool( + c10::DeviceIndex device, + MempoolId_t mempool_id) = 0; + virtual void releasePool(c10::DeviceIndex device, MempoolId_t mempool_id) = 0; + virtual int getPoolUseCount( + c10::DeviceIndex /*device*/, + MempoolId_t /*mempool_id*/) { + TORCH_CHECK( + false, + name(), + " does not yet support getPoolUseCount. " + "If you need it, please file an issue describing your use case."); + } + virtual void createOrIncrefPool( + c10::DeviceIndex /*device*/, + MempoolId_t /*mempool_id*/, + CUDAAllocator* allocator = nullptr) { + TORCH_CHECK( + false, + name(), + " does not yet support createOrIncrefPool. " + "If you need it, please file an issue describing your use case."); + } + virtual void setUseOnOOM(c10::DeviceIndex device, MempoolId_t mempool_id) { + TORCH_CHECK( + false, + name(), + " does not yet support setUseOnOOM. " + "If you need it, please file an issue describing your use case."); + } + + // returns true if the allocated blocks are equal to expected live allocations + virtual bool checkPoolLiveAllocations( + c10::DeviceIndex /*device*/, + MempoolId_t /*mempool_id*/, + const std::unordered_set& /*expected_live_allocations*/) { + TORCH_CHECK( + false, + name(), + " does not yet support checkPoolLiveAllocations. " + "If you need it, please file an issue describing your use case."); + } + virtual ShareableHandle shareIpcHandle(void* ptr) = 0; + virtual std::shared_ptr getIpcDevPtr(std::string handle) = 0; + virtual bool isHistoryEnabled() { + TORCH_CHECK( + false, + name(), + " does not yet support recordHistory. " + "If you need it, please file an issue describing your use case."); + } + virtual void recordHistory( + bool enabled, + CreateContextFn context_recorder, + size_t alloc_trace_max_entries, + RecordContext when, + bool clearHistory) = 0; + virtual void recordAnnotation( + const std::vector>& /*md*/) {} + virtual void pushCompileContext(std::string& md) {} + virtual void popCompileContext() {} + virtual void attachOutOfMemoryObserver(OutOfMemoryObserver observer) = 0; + + // Attached AllocatorTraceTracker callbacks will be called while the + // per-device allocator lock is held. Any additional locks taken from within + // the callback must be proven to always have the lock order that never + // triggers a deadlock. In particular, Python's GIL may be held when + // calling the allocator so it is unsafe to try to acquire the GIL in this + // callback. + virtual void attachAllocatorTraceTracker(AllocatorTraceTracker tracker) = 0; + + virtual void enablePeerAccess( + c10::DeviceIndex dev, + c10::DeviceIndex dev_to_access) = 0; + + // memory not allocated from cudaMalloc cannot be copied + // across devices using cudaMemcpyAsync if peer to peer access is disabled. + // instead it requires cudaMemcpyAsyncPeer + // with P2P Enabled, all combinations work + // with P2P Disabled: + // cudaMalloc cudaMallocAsync/cuMemMap + // cudaMemcpyAsyncPeer works works + // cudaMemcpyAsync works error + + // This function performs chooses to use the Peer version of + // memcpy if required based on where the allocated put dst/src. + virtual cudaError_t memcpyAsync( + void* dst, + int dstDevice, + const void* src, + int srcDevice, + size_t count, + cudaStream_t stream, + bool p2p_enabled) = 0; + virtual std::shared_ptr getCheckpointState( + c10::DeviceIndex device, + MempoolId_t id) = 0; + virtual CheckpointDelta setCheckpointPoolState( + c10::DeviceIndex device, + std::shared_ptr pps) = 0; + virtual std::string name() = 0; +}; + +// Allocator object, statically initialized +// See BackendInitializer in CUDACachingAllocator.cpp. +// Atomic loads on x86 are just normal loads, +// (atomic stores are different), so reading this value +// is no different than loading a pointer. +C10_CUDA_API extern std::atomic allocator; + +inline CUDAAllocator* get() { + return allocator.load(); +} + +// Called directly by clients. +inline void* raw_alloc(size_t nbytes) { + return get()->raw_alloc(nbytes); +} + +inline void* raw_alloc_with_stream(size_t nbytes, cudaStream_t stream) { + return get()->raw_alloc_with_stream(nbytes, stream); +} + +inline void raw_delete(void* ptr) { + return get()->raw_delete(ptr); +} + +inline void init(int device_count) { + return get()->init(device_count); +} + +inline double getMemoryFraction(c10::DeviceIndex device) { + return get()->getMemoryFraction(device); +} + +inline void setMemoryFraction(double fraction, c10::DeviceIndex device) { + return get()->setMemoryFraction(fraction, device); +} + +inline void emptyCache(MempoolId_t mempool_id = {0, 0}) { + return get()->emptyCache(mempool_id); +} + +inline void enable(bool value) { + return get()->enable(value); +} + +inline bool isEnabled() { + return get()->isEnabled(); +} + +inline void cacheInfo(c10::DeviceIndex device, size_t* largestBlock) { + return get()->cacheInfo(device, largestBlock); +} + +inline void* getBaseAllocation(void* ptr, size_t* size) { + return get()->getBaseAllocation(ptr, size); +} + +inline void recordStream(const DataPtr& dataPtr, CUDAStream stream) { + return get()->recordStream(dataPtr, stream); +} + +inline c10::CachingDeviceAllocator::DeviceStats getDeviceStats( + c10::DeviceIndex device) { + return get()->getDeviceStats(device); +} + +inline void resetAccumulatedStats(c10::DeviceIndex device) { + return get()->resetAccumulatedStats(device); +} + +inline void resetPeakStats(c10::DeviceIndex device) { + return get()->resetPeakStats(device); +} + +inline SnapshotInfo snapshot(MempoolId_t mempool_id = {0, 0}) { + return get()->snapshot(mempool_id); +} + +inline std::shared_ptr getCheckpointState( + c10::DeviceIndex device, + MempoolId_t id) { + return get()->getCheckpointState(device, id); +} + +inline CheckpointDelta setCheckpointPoolState( + c10::DeviceIndex device, + std::shared_ptr pps) { + return get()->setCheckpointPoolState(device, std::move(pps)); +} + +// CUDAGraph interactions +inline void beginAllocateToPool( + c10::DeviceIndex device, + MempoolId_t mempool_id, + std::function filter) { + get()->beginAllocateToPool(device, mempool_id, std::move(filter)); +} + +inline void endAllocateToPool(c10::DeviceIndex device, MempoolId_t mempool_id) { + get()->endAllocateToPool(device, mempool_id); +} + +inline void recordHistory( + bool enabled, + CreateContextFn context_recorder, + size_t alloc_trace_max_entries, + RecordContext when, + bool clearHistory) { + return get()->recordHistory( + enabled, context_recorder, alloc_trace_max_entries, when, clearHistory); +} + +inline void recordAnnotation( + const std::vector>& md) { + return get()->recordAnnotation(md); +} + +inline void pushCompileContext(std::string& md) { + return get()->pushCompileContext(md); +} + +inline void popCompileContext() { + return get()->popCompileContext(); +} + +inline bool isHistoryEnabled() { + return get()->isHistoryEnabled(); +} + +inline bool checkPoolLiveAllocations( + c10::DeviceIndex device, + MempoolId_t mempool_id, + const std::unordered_set& expected_live_allocations) { + return get()->checkPoolLiveAllocations( + device, mempool_id, expected_live_allocations); +} + +inline void attachOutOfMemoryObserver(OutOfMemoryObserver observer) { + return get()->attachOutOfMemoryObserver(std::move(observer)); +} + +inline void attachAllocatorTraceTracker(AllocatorTraceTracker tracker) { + return get()->attachAllocatorTraceTracker(std::move(tracker)); +} + +inline void releasePool(c10::DeviceIndex device, MempoolId_t mempool_id) { + return get()->releasePool(device, mempool_id); +} +inline void createOrIncrefPool( + c10::DeviceIndex device, + MempoolId_t mempool_id, + CUDAAllocator* allocator_ptr = nullptr) { + get()->createOrIncrefPool(device, mempool_id, allocator_ptr); +} +inline void setUseOnOOM(c10::DeviceIndex device, MempoolId_t mempool_id) { + get()->setUseOnOOM(device, mempool_id); +} + +inline int getPoolUseCount(c10::DeviceIndex device, MempoolId_t mempool_id) { + return get()->getPoolUseCount(device, mempool_id); +} + +// Not part of CUDA_ALLOCATOR_BACKEND_INTERFACE +inline std::shared_ptr getIpcDevPtr(std::string handle) { + return get()->getIpcDevPtr(std::move(handle)); +} + +inline ShareableHandle shareIpcHandle(void* ptr) { + return get()->shareIpcHandle(ptr); +} + +inline std::string name() { + return get()->name(); +} + +inline cudaError_t memcpyAsync( + void* dst, + int dstDevice, + const void* src, + int srcDevice, + size_t count, + cudaStream_t stream, + bool p2p_enabled) { + return get()->memcpyAsync( + dst, dstDevice, src, srcDevice, count, stream, p2p_enabled); +} + +inline void enablePeerAccess( + c10::DeviceIndex dev, + c10::DeviceIndex dev_to_access) { + return get()->enablePeerAccess(dev, dev_to_access); +} + +} // namespace c10::cuda::CUDACachingAllocator + +namespace c10::cuda { + +// MemPool represents a pool of memory in a caching allocator. Currently, +// it's just the ID of the pool object maintained in the CUDACachingAllocator. +// +// An allocator pointer can be passed to the MemPool to define how the +// allocations should be done in the pool. For example: using a different +// system allocator such as ncclMemAlloc. +struct C10_CUDA_API MemPool { + MemPool( + CUDACachingAllocator::CUDAAllocator* allocator = nullptr, + bool is_user_created = true, + bool use_on_oom = false, + bool symmetric = false); + MemPool(const MemPool&) = delete; + MemPool(MemPool&&) = default; + MemPool& operator=(const MemPool&) = delete; + MemPool& operator=(MemPool&&) = default; + ~MemPool(); + + MempoolId_t id(); + bool is_symmetric(); + CUDACachingAllocator::CUDAAllocator* allocator(); + int use_count(); + c10::DeviceIndex device(); + static MempoolId_t graph_pool_handle(bool is_user_created = true); + + private: + static std::atomic uid_; + static std::atomic uuid_; + CUDACachingAllocator::CUDAAllocator* allocator_; + bool is_user_created_; + MempoolId_t id_; + bool symmetric_; + c10::DeviceIndex device_; +}; + +} // namespace c10::cuda diff --git a/phivenv/Lib/site-packages/torch/include/c10/cuda/CUDADeviceAssertion.h b/phivenv/Lib/site-packages/torch/include/c10/cuda/CUDADeviceAssertion.h new file mode 100644 index 0000000000000000000000000000000000000000..c266540a6bc551b9b2fbdbc22a7c26e89c5ac9e8 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/cuda/CUDADeviceAssertion.h @@ -0,0 +1,98 @@ +#pragma once + +#include +#include + +namespace c10::cuda { + +#ifdef TORCH_USE_CUDA_DSA +C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Wunused-function") +// Copy string from `src` to `dst` +static __device__ void dstrcpy(char* dst, const char* src) { + int i = 0; + // Copy string from source to destination, ensuring that it + // isn't longer than `C10_CUDA_DSA_MAX_STR_LEN-1` + while (*src != '\0' && i++ < C10_CUDA_DSA_MAX_STR_LEN - 1) { + *dst++ = *src++; + } + *dst = '\0'; +} + +static __device__ void dsa_add_new_assertion_failure( + DeviceAssertionsData* assertions_data, + const char* assertion_msg, + const char* filename, + const char* function_name, + const int line_number, + const uint32_t caller, + const dim3 block_id, + const dim3 thread_id) { + // `assertions_data` may be nullptr if device-side assertion checking + // is disabled at run-time. If it is disabled at compile time this + // function will never be called + if (!assertions_data) { + return; + } + + // Atomically increment so other threads can fail at the same time + // Note that incrementing this means that the CPU can observe that + // a failure has happened and can begin to respond before we've + // written information about that failure out to the buffer. + const auto nid = atomicAdd(&(assertions_data->assertion_count), 1); + + if (nid >= C10_CUDA_DSA_ASSERTION_COUNT) { + // At this point we're ran out of assertion buffer space. + // We could print a message about this, but that'd get + // spammy if a lot of threads did it, so we just silently + // ignore any other assertion failures. In most cases the + // failures will all probably be analogous anyway. + return; + } + + // Write information about the assertion failure to memory. + // Note that this occurs only after the `assertion_count` + // increment broadcasts that there's been a problem. + auto& self = assertions_data->assertions[nid]; + dstrcpy(self.assertion_msg, assertion_msg); + dstrcpy(self.filename, filename); + dstrcpy(self.function_name, function_name); + self.line_number = line_number; + self.caller = caller; + self.block_id[0] = block_id.x; + self.block_id[1] = block_id.y; + self.block_id[2] = block_id.z; + self.thread_id[0] = thread_id.x; + self.thread_id[1] = thread_id.y; + self.thread_id[2] = thread_id.z; +} +C10_CLANG_DIAGNOSTIC_POP() + +// Emulates a kernel assertion. The assertion won't stop the kernel's progress, +// so you should assume everything the kernel produces is garbage if there's an +// assertion failure. +// NOTE: This assumes that `assertions_data` and `assertion_caller_id` are +// arguments of the kernel and therefore accessible. +#define CUDA_KERNEL_ASSERT2(condition) \ + do { \ + if (C10_UNLIKELY(!(condition))) { \ + /* Has an atomic element so threads can fail at the same time */ \ + c10::cuda::dsa_add_new_assertion_failure( \ + assertions_data, \ + C10_STRINGIZE(condition), \ + __FILE__, \ + __FUNCTION__, \ + __LINE__, \ + assertion_caller_id, \ + blockIdx, \ + threadIdx); \ + /* Now that the kernel has failed we early exit the kernel, but */ \ + /* otherwise keep going and rely on the host to check UVM and */ \ + /* determine we've had a problem */ \ + return; \ + } \ + } while (false) +#else +#define CUDA_KERNEL_ASSERT2(condition) assert(condition) +#endif + +} // namespace c10::cuda diff --git a/phivenv/Lib/site-packages/torch/include/c10/cuda/CUDADeviceAssertionHost.h b/phivenv/Lib/site-packages/torch/include/c10/cuda/CUDADeviceAssertionHost.h new file mode 100644 index 0000000000000000000000000000000000000000..d1f7571c00289c98292c7de209f6af117083d88e --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/cuda/CUDADeviceAssertionHost.h @@ -0,0 +1,164 @@ +#pragma once + +#include + +#include +#include +#include +#include +#include +#include + +#ifdef USE_CUDA +#define TORCH_USE_CUDA_DSA +#endif + +/// Number of assertion failure messages we can store. If this is too small +/// threads will fail silently. +constexpr int C10_CUDA_DSA_ASSERTION_COUNT = 10; +constexpr int C10_CUDA_DSA_MAX_STR_LEN = 512; + +namespace c10::cuda { + +/// Holds information about any device-side assertions that fail. +/// Held in managed memory and access by both the CPU and the GPU. +struct DeviceAssertionData { + /// Stringification of the assertion + // NOLINTNEXTLINE(*-c-arrays) + char assertion_msg[C10_CUDA_DSA_MAX_STR_LEN]{}; + /// File the assertion was in + // NOLINTNEXTLINE(*-c-arrays) + char filename[C10_CUDA_DSA_MAX_STR_LEN]{}; + /// Name of the function the assertion was in + // NOLINTNEXTLINE(*-c-arrays) + char function_name[C10_CUDA_DSA_MAX_STR_LEN]{}; + /// Line number the assertion was at + int line_number{}; + /// Number uniquely identifying the kernel launch that triggered the assertion + uint32_t caller{}; + /// block_id of the thread that failed the assertion + // NOLINTNEXTLINE(*-c-arrays) + int32_t block_id[3]{}; + /// third_id of the thread that failed the assertion + // NOLINTNEXTLINE(*-c-arrays) + int32_t thread_id[3]{}; +}; + +/// Used to hold assertions generated by the device +/// Held in managed memory and access by both the CPU and the GPU. +struct DeviceAssertionsData { + /// Total number of assertions found; a subset of these will be recorded + /// in `assertions` + int32_t assertion_count{}; + /// An array of assertions that will be written to in a race-free manner + // NOLINTNEXTLINE(*-c-arrays) + DeviceAssertionData assertions[C10_CUDA_DSA_ASSERTION_COUNT]{}; +}; + +/// Use to hold info about kernel launches so that we can run kernels +/// asynchronously and still associate launches with device-side +/// assertion failures +struct CUDAKernelLaunchInfo { + /// Filename of the code where the kernel was launched from + const char* launch_filename; + /// Function from which the kernel was launched + const char* launch_function; + /// Line number of where the code was launched from + uint32_t launch_linenum; + /// Backtrace of where the kernel was launched from, only populated if + /// CUDAKernelLaunchRegistry::gather_launch_stacktrace is True + std::string launch_stacktrace; + /// Kernel that was launched + const char* kernel_name; + /// Device the kernel was launched on + int device; + /// Stream the kernel was launched on + int32_t stream; + /// A number that uniquely identifies the kernel launch + uint64_t generation_number; +}; + +/// Circular buffer used to hold information about kernel launches +/// this is later used to reconstruct how a device-side kernel assertion failure +/// occurred CUDAKernelLaunchRegistry is used as a singleton +class C10_CUDA_API CUDAKernelLaunchRegistry { + private: + /// Assume that this is the max number of kernel launches that might ever be + /// enqueued across all streams on a single device + static constexpr int max_kernel_launches = 1024; + /// How many kernel launch infos we've inserted. Used to ensure that circular + /// queue doesn't provide false information by always increasing, but also to + /// mark where we are inserting into the queue +#ifdef TORCH_USE_CUDA_DSA + uint64_t generation_number = 0; +#endif + /// Shared mutex between writer and accessor to ensure multi-threaded safety. + mutable std::mutex read_write_mutex; + /// Used to ensure prevent race conditions in GPU memory allocation + mutable std::mutex gpu_alloc_mutex; + /// Pointer to managed memory keeping track of device-side assertions. There + /// is one entry for each possible device the process might work with. Unused + /// entries are nullptrs. We could also use an unordered_set here, but this + /// vector design will be faster and the wasted memory is small since we + /// expect the number of GPUs per node will always be small + std::vector< + std::unique_ptr> + uvm_assertions; + /// A single circular buffer holds information about every kernel launch the + /// process makes across all devices. + std::vector kernel_launches; + bool check_env_for_enable_launch_stacktracing() const; + bool check_env_for_dsa_enabled() const; + + public: + CUDAKernelLaunchRegistry(); + /// Register a new kernel launch and obtain a generation number back to be + /// passed to the kernel + uint32_t insert( + const char* launch_filename, + const char* launch_function, + const uint32_t launch_linenum, + const char* kernel_name, + const int32_t stream_id); + /// Get copies of the kernel launch registry and each device's assertion + /// failure buffer so they can be inspected without raising race conditions + std:: + pair, std::vector> + snapshot() const; + /// Get a pointer to the current device's assertion failure buffer. If no such + /// buffer exists then one is created. This means that the first kernel launch + /// made on each device will be slightly slower because memory allocations are + /// required + DeviceAssertionsData* get_uvm_assertions_ptr_for_current_device(); + /// Gets the global singleton of the registry + static CUDAKernelLaunchRegistry& get_singleton_ref(); + /// If not all devices support DSA, we disable it + const bool do_all_devices_support_managed_memory = false; + /// Whether or not to gather stack traces when launching kernels + bool gather_launch_stacktrace = false; + /// Whether or not host-side DSA is enabled or disabled at run-time + /// Note: Device-side code cannot be enabled/disabled at run-time + bool enabled_at_runtime = false; + /// Whether or not a device has indicated a failure + bool has_failed() const; +#ifdef TORCH_USE_CUDA_DSA + const bool enabled_at_compile_time = true; +#else + const bool enabled_at_compile_time = false; +#endif +}; + +C10_CUDA_API std::string c10_retrieve_device_side_assertion_info(); + +} // namespace c10::cuda + +// Each kernel launched with TORCH_DSA_KERNEL_LAUNCH +// requires the same input arguments. We introduce the following macro to +// standardize these. +#define TORCH_DSA_KERNEL_ARGS \ + [[maybe_unused]] c10::cuda::DeviceAssertionsData *const assertions_data, \ + [[maybe_unused]] uint32_t assertion_caller_id + +// This macro can be used to pass the DSA arguments onward to another +// function +#define TORCH_DSA_KERNEL_ARGS_PASS assertions_data, assertion_caller_id diff --git a/phivenv/Lib/site-packages/torch/include/c10/cuda/CUDAException.h b/phivenv/Lib/site-packages/torch/include/c10/cuda/CUDAException.h new file mode 100644 index 0000000000000000000000000000000000000000..b61440256f54938832edf63ac6a89c899ca7674f --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/cuda/CUDAException.h @@ -0,0 +1,97 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +// Note [CHECK macro] +// ~~~~~~~~~~~~~~~~~~ +// This is a macro so that AT_ERROR can get accurate __LINE__ +// and __FILE__ information. We could split this into a short +// macro and a function implementation if we pass along __LINE__ +// and __FILE__, but no one has found this worth doing. + +// Used to denote errors from CUDA framework. +// This needs to be declared here instead util/Exception.h for proper conversion +// during hipify. +namespace c10 { +class C10_CUDA_API CUDAError : public c10::Error { + using Error::Error; +}; +} // namespace c10 + +#define C10_CUDA_CHECK(EXPR) \ + do { \ + const cudaError_t __err = EXPR; \ + c10::cuda::c10_cuda_check_implementation( \ + static_cast(__err), \ + __FILE__, \ + __func__, /* Line number data type not well-defined between \ + compilers, so we perform an explicit cast */ \ + static_cast(__LINE__), \ + true); \ + } while (0) + +#define C10_CUDA_CHECK_WARN(EXPR) \ + do { \ + const cudaError_t __err = EXPR; \ + if (C10_UNLIKELY(__err != cudaSuccess)) { \ + [[maybe_unused]] auto error_unused = cudaGetLastError(); \ + TORCH_WARN("CUDA warning: ", cudaGetErrorString(__err)); \ + } \ + } while (0) + +// Indicates that a CUDA error is handled in a non-standard way +#define C10_CUDA_ERROR_HANDLED(EXPR) EXPR + +// Intentionally ignore a CUDA error +#define C10_CUDA_IGNORE_ERROR(EXPR) \ + do { \ + const cudaError_t __err = EXPR; \ + if (C10_UNLIKELY(__err != cudaSuccess)) { \ + [[maybe_unused]] cudaError_t error_unused = cudaGetLastError(); \ + } \ + } while (0) + +// Clear the last CUDA error +#define C10_CUDA_CLEAR_ERROR() \ + do { \ + [[maybe_unused]] cudaError_t error_unused = cudaGetLastError(); \ + } while (0) + +// This should be used directly after every kernel launch to ensure +// the launch happened correctly and provide an early, close-to-source +// diagnostic if it didn't. +#define C10_CUDA_KERNEL_LAUNCH_CHECK() C10_CUDA_CHECK(cudaGetLastError()) + +/// Launches a CUDA kernel appending to it all the information need to handle +/// device-side assertion failures. Checks that the launch was successful. +#define TORCH_DSA_KERNEL_LAUNCH( \ + kernel, blocks, threads, shared_mem, stream, ...) \ + do { \ + auto& launch_registry = \ + c10::cuda::CUDAKernelLaunchRegistry::get_singleton_ref(); \ + kernel<<>>( \ + __VA_ARGS__, \ + launch_registry.get_uvm_assertions_ptr_for_current_device(), \ + launch_registry.insert( \ + __FILE__, __FUNCTION__, __LINE__, #kernel, stream.id())); \ + C10_CUDA_KERNEL_LAUNCH_CHECK(); \ + } while (0) + +namespace c10::cuda { + +/// In the event of a CUDA failure, formats a nice error message about that +/// failure and also checks for device-side assertion failures +C10_CUDA_API void c10_cuda_check_implementation( + const int32_t err, + const char* filename, + const char* function_name, + const int line_number, + const bool include_device_assertions); + +} // namespace c10::cuda diff --git a/phivenv/Lib/site-packages/torch/include/c10/cuda/CUDAFunctions.h b/phivenv/Lib/site-packages/torch/include/c10/cuda/CUDAFunctions.h new file mode 100644 index 0000000000000000000000000000000000000000..e2a44bf17f00ec29f10244c9e9b8430b0598d83e --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/cuda/CUDAFunctions.h @@ -0,0 +1,117 @@ +#pragma once + +// This header provides C++ wrappers around commonly used CUDA API functions. +// The benefit of using C++ here is that we can raise an exception in the +// event of an error, rather than explicitly pass around error codes. This +// leads to more natural APIs. +// +// The naming convention used here matches the naming convention of torch.cuda + +#include +#include +#include +#include +#include +namespace c10::cuda { + +// NB: In the past, we were inconsistent about whether or not this reported +// an error if there were driver problems are not. Based on experience +// interacting with users, it seems that people basically ~never want this +// function to fail; it should just return zero if things are not working. +// Oblige them. +// It still might log a warning for user first time it's invoked +C10_CUDA_API DeviceIndex device_count() noexcept; + +// Version of device_count that throws is no devices are detected +C10_CUDA_API DeviceIndex device_count_ensure_non_zero(); + +C10_CUDA_API DeviceIndex current_device(); + +C10_CUDA_API void set_device(DeviceIndex device, const bool force = false); + +C10_CUDA_API void device_synchronize(); + +C10_CUDA_API void warn_or_error_on_sync(); + +// Raw CUDA device management functions +C10_CUDA_API cudaError_t GetDeviceCount(int* dev_count); + +C10_CUDA_API cudaError_t GetDevice(DeviceIndex* device); + +C10_CUDA_API cudaError_t +SetDevice(DeviceIndex device, const bool force = false); + +C10_CUDA_API cudaError_t MaybeSetDevice(DeviceIndex device); + +C10_CUDA_API DeviceIndex ExchangeDevice(DeviceIndex device); + +C10_CUDA_API DeviceIndex MaybeExchangeDevice(DeviceIndex device); + +C10_CUDA_API void SetTargetDevice(); + +enum class SyncDebugMode { L_DISABLED = 0, L_WARN, L_ERROR }; + +// this is a holder for c10 global state (similar to at GlobalContext) +// currently it's used to store cuda synchronization warning state, +// but can be expanded to hold other related global state, e.g. to +// record stream usage +class WarningState { + public: + void set_sync_debug_mode(SyncDebugMode l) { + sync_debug_mode = l; + } + + SyncDebugMode get_sync_debug_mode() { + return sync_debug_mode; + } + + private: + SyncDebugMode sync_debug_mode = SyncDebugMode::L_DISABLED; +}; + +C10_CUDA_API __inline__ WarningState& warning_state() { + static WarningState warning_state_; + return warning_state_; +} +// the subsequent functions are defined in the header because for performance +// reasons we want them to be inline +C10_CUDA_API void __inline__ memcpy_and_sync( + void* dst, + const void* src, + int64_t nbytes, + cudaMemcpyKind kind, + cudaStream_t stream) { + if (C10_UNLIKELY( + warning_state().get_sync_debug_mode() != SyncDebugMode::L_DISABLED)) { + warn_or_error_on_sync(); + } + const c10::impl::PyInterpreter* interp = c10::impl::GPUTrace::get_trace(); + if (C10_UNLIKELY(interp)) { + (*interp)->trace_gpu_stream_synchronization( + c10::kCUDA, reinterpret_cast(stream)); + } +#if defined(TORCH_HIP_VERSION) && (TORCH_HIP_VERSION >= 301) + C10_CUDA_CHECK(hipMemcpyWithStream(dst, src, nbytes, kind, stream)); +#else + C10_CUDA_CHECK(cudaMemcpyAsync(dst, src, nbytes, kind, stream)); + C10_CUDA_CHECK(cudaStreamSynchronize(stream)); +#endif +} + +C10_CUDA_API void __inline__ stream_synchronize(cudaStream_t stream) { + if (C10_UNLIKELY( + warning_state().get_sync_debug_mode() != SyncDebugMode::L_DISABLED)) { + warn_or_error_on_sync(); + } + const c10::impl::PyInterpreter* interp = c10::impl::GPUTrace::get_trace(); + if (C10_UNLIKELY(interp)) { + (*interp)->trace_gpu_stream_synchronization( + c10::kCUDA, reinterpret_cast(stream)); + } + C10_CUDA_CHECK(cudaStreamSynchronize(stream)); +} + +C10_CUDA_API bool hasPrimaryContext(DeviceIndex device_index); +C10_CUDA_API std::optional getDeviceIndexWithPrimaryContext(); + +} // namespace c10::cuda diff --git a/phivenv/Lib/site-packages/torch/include/c10/cuda/CUDAGraphsC10Utils.h b/phivenv/Lib/site-packages/torch/include/c10/cuda/CUDAGraphsC10Utils.h new file mode 100644 index 0000000000000000000000000000000000000000..5a34422b2e8e222c0dae322776fe9e3ec5e87cfb --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/cuda/CUDAGraphsC10Utils.h @@ -0,0 +1,82 @@ +#pragma once + +#include +#include +#include + +// CUDA Graphs utils used by c10 and aten. +// aten/cuda/CUDAGraphsUtils.cuh adds utils used by aten only. + +namespace c10::cuda { + +using CaptureId_t = unsigned long long; + +// first is set if the instance is created by CUDAGraph::capture_begin. +// second is set if the instance is created by at::cuda::graph_pool_handle. +using MempoolId_t = std::pair; + +// RAII guard for "cudaStreamCaptureMode", a thread-local value +// that controls the error-checking strictness of a capture. +struct C10_CUDA_API CUDAStreamCaptureModeGuard { + CUDAStreamCaptureModeGuard(cudaStreamCaptureMode desired) + : strictness_(desired) { + C10_CUDA_CHECK(cudaThreadExchangeStreamCaptureMode(&strictness_)); + } + CUDAStreamCaptureModeGuard(const CUDAStreamCaptureModeGuard&) = delete; + CUDAStreamCaptureModeGuard(CUDAStreamCaptureModeGuard&&) = delete; + CUDAStreamCaptureModeGuard& operator=(const CUDAStreamCaptureModeGuard&) = + delete; + CUDAStreamCaptureModeGuard& operator=(CUDAStreamCaptureModeGuard&&) = delete; + ~CUDAStreamCaptureModeGuard() { + C10_CUDA_CHECK_WARN(cudaThreadExchangeStreamCaptureMode(&strictness_)); + } + + private: + cudaStreamCaptureMode strictness_; +}; + +// Protects against enum cudaStreamCaptureStatus implementation changes. +// Some compilers seem not to like static_assert without the messages. +static_assert( + int(cudaStreamCaptureStatus::cudaStreamCaptureStatusNone) == 0, + "unexpected int(cudaStreamCaptureStatusNone) value"); +static_assert( + int(cudaStreamCaptureStatus::cudaStreamCaptureStatusActive) == 1, + "unexpected int(cudaStreamCaptureStatusActive) value"); +static_assert( + int(cudaStreamCaptureStatus::cudaStreamCaptureStatusInvalidated) == 2, + "unexpected int(cudaStreamCaptureStatusInvalidated) value"); + +enum class CaptureStatus : int { + None = int(cudaStreamCaptureStatus::cudaStreamCaptureStatusNone), + Active = int(cudaStreamCaptureStatus::cudaStreamCaptureStatusActive), + Invalidated = int(cudaStreamCaptureStatus::cudaStreamCaptureStatusInvalidated) +}; + +inline std::ostream& operator<<(std::ostream& os, CaptureStatus status) { + switch (status) { + case CaptureStatus::None: + os << "cudaStreamCaptureStatusNone"; + break; + case CaptureStatus::Active: + os << "cudaStreamCaptureStatusActive"; + break; + case CaptureStatus::Invalidated: + os << "cudaStreamCaptureStatusInvalidated"; + break; + default: + TORCH_INTERNAL_ASSERT( + false, "Unknown CUDA graph CaptureStatus", int(status)); + } + return os; +} + +// Use this version where you're sure a CUDA context exists already. +inline CaptureStatus currentStreamCaptureStatusMayInitCtx() { + cudaStreamCaptureStatus is_capturing{cudaStreamCaptureStatusNone}; + C10_CUDA_CHECK( + cudaStreamIsCapturing(c10::cuda::getCurrentCUDAStream(), &is_capturing)); + return CaptureStatus(is_capturing); +} + +} // namespace c10::cuda diff --git a/phivenv/Lib/site-packages/torch/include/c10/cuda/CUDAGuard.h b/phivenv/Lib/site-packages/torch/include/c10/cuda/CUDAGuard.h new file mode 100644 index 0000000000000000000000000000000000000000..9b8e00dbaef6002870586a6361286950a5c5d69d --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/cuda/CUDAGuard.h @@ -0,0 +1,306 @@ +#pragma once + +#include +#include +#include +#include +#include + +namespace c10::cuda { + +// This code is kind of boilerplatey. See Note [Whither the DeviceGuard +// boilerplate] + +/// A variant of DeviceGuard that is specialized for CUDA. It accepts +/// integer indices (interpreting them as CUDA devices) and is a little +/// more efficient than DeviceGuard (it compiles to straight line +/// cudaSetDevice/cudaGetDevice calls); however, it can only be used +/// from code that links against CUDA directly. +struct CUDAGuard { + /// No default constructor; see Note [Omitted default constructor from RAII] + explicit CUDAGuard() = delete; + + /// Set the current CUDA device to the passed device index. + explicit CUDAGuard(DeviceIndex device_index) : guard_(device_index) {} + + /// Sets the current CUDA device to the passed device. Errors if the passed + /// device is not a CUDA device. + explicit CUDAGuard(Device device) : guard_(device) {} + + // Copy is not allowed + CUDAGuard(const CUDAGuard&) = delete; + CUDAGuard& operator=(const CUDAGuard&) = delete; + + // Move is not allowed (there is no uninitialized state) + CUDAGuard(CUDAGuard&& other) = delete; + CUDAGuard& operator=(CUDAGuard&& other) = delete; + ~CUDAGuard() = default; + + /// Sets the CUDA device to the given device. Errors if the given device + /// is not a CUDA device. + void set_device(Device device) { + guard_.set_device(device); + } + + /// Sets the CUDA device to the given device. Errors if the given device + /// is not a CUDA device. (This method is provided for uniformity with + /// DeviceGuard). + void reset_device(Device device) { + guard_.reset_device(device); + } + + /// Sets the CUDA device to the given device index. + void set_index(DeviceIndex device_index) { + guard_.set_index(device_index); + } + + /// Returns the device that was set upon construction of the guard + Device original_device() const { + return guard_.original_device(); + } + + /// Returns the last device that was set via `set_device`, if any, otherwise + /// the device passed during construction. + Device current_device() const { + return guard_.current_device(); + } + + private: + /// The guard for the current device. + c10::impl::InlineDeviceGuard guard_; +}; + +/// A variant of OptionalDeviceGuard that is specialized for CUDA. See +/// CUDAGuard for when you can use this. +struct OptionalCUDAGuard { + /// Create an uninitialized OptionalCUDAGuard. + explicit OptionalCUDAGuard() = default; + + /// Set the current CUDA device to the passed Device, if it is not nullopt. + explicit OptionalCUDAGuard(std::optional device_opt) + : guard_(device_opt) {} + + /// Set the current CUDA device to the passed device index, if it is not + /// nullopt + explicit OptionalCUDAGuard(std::optional device_index_opt) + : guard_(device_index_opt) {} + + // Copy is not allowed + OptionalCUDAGuard(const OptionalCUDAGuard&) = delete; + OptionalCUDAGuard& operator=(const OptionalCUDAGuard&) = delete; + + // See Note [Move construction for RAII guards is tricky] + OptionalCUDAGuard(OptionalCUDAGuard&& other) = delete; + + // See Note [Move assignment for RAII guards is tricky] + OptionalCUDAGuard& operator=(OptionalCUDAGuard&& other) = delete; + ~OptionalCUDAGuard() = default; + + /// Sets the CUDA device to the given device, initializing the guard if it + /// is not already initialized. Errors if the given device is not a CUDA + /// device. + void set_device(Device device) { + guard_.set_device(device); + } + + /// Sets the CUDA device to the given device, initializing the guard if it is + /// not already initialized. Errors if the given device is not a CUDA device. + /// (This method is provided for uniformity with OptionalDeviceGuard). + void reset_device(Device device) { + guard_.reset_device(device); + } + + /// Sets the CUDA device to the given device index, initializing the guard if + /// it is not already initialized. + void set_index(DeviceIndex device_index) { + guard_.set_index(device_index); + } + + /// Returns the device that was set immediately prior to initialization of the + /// guard, or nullopt if the guard is uninitialized. + std::optional original_device() const { + return guard_.original_device(); + } + + /// Returns the most recent device that was set using this device guard, + /// either from construction, or via set_device, if the guard is initialized, + /// or nullopt if the guard is uninitialized. + std::optional current_device() const { + return guard_.current_device(); + } + + /// Restore the original CUDA device, resetting this guard to uninitialized + /// state. + void reset() { + guard_.reset(); + } + + private: + c10::impl::InlineOptionalDeviceGuard guard_; +}; + +/// A variant of StreamGuard that is specialized for CUDA. See CUDAGuard +/// for when you can use this. +struct CUDAStreamGuard { + /// No default constructor, see Note [Omitted default constructor from RAII] + explicit CUDAStreamGuard() = delete; + + /// Set the current CUDA device to the device associated with the passed + /// stream, and set the current CUDA stream on that device to the passed + /// stream. Errors if the Stream is not a CUDA stream. + explicit CUDAStreamGuard(Stream stream) : guard_(stream) {} + ~CUDAStreamGuard() = default; + + /// Copy is disallowed + CUDAStreamGuard(const CUDAStreamGuard&) = delete; + CUDAStreamGuard& operator=(const CUDAStreamGuard&) = delete; + + /// Move is disallowed, as CUDAStreamGuard does not have an uninitialized + /// state, which is required for moves on types with nontrivial destructors. + CUDAStreamGuard(CUDAStreamGuard&& other) = delete; + CUDAStreamGuard& operator=(CUDAStreamGuard&& other) = delete; + + /// Resets the currently set stream to the original stream and + /// the currently set device to the original device. Then, + /// set the current device to the device associated with the passed stream, + /// and set the current stream on that device to the passed stream. + /// Errors if the stream passed is not a CUDA stream. + /// + /// NOTE: this implementation may skip some stream/device setting if + /// it can prove that it is unnecessary. + /// + /// WARNING: reset_stream does NOT preserve previously set streams on + /// different devices. If you need to set streams on multiple devices + /// on CUDA, use CUDAMultiStreamGuard instead. + void reset_stream(Stream stream) { + guard_.reset_stream(stream); + } + + /// Returns the CUDA stream that was set at the time the guard was + /// constructed. + CUDAStream original_stream() const { + return CUDAStream(CUDAStream::UNCHECKED, guard_.original_stream()); + } + + /// Returns the most recent CUDA stream that was set using this device guard, + /// either from construction, or via set_stream. + CUDAStream current_stream() const { + return CUDAStream(CUDAStream::UNCHECKED, guard_.current_stream()); + } + + /// Returns the most recent CUDA device that was set using this device guard, + /// either from construction, or via set_device/reset_device/set_index. + Device current_device() const { + return guard_.current_device(); + } + + /// Returns the CUDA device that was set at the most recent reset_stream(), + /// or otherwise the device at construction time. + Device original_device() const { + return guard_.original_device(); + } + + private: + c10::impl::InlineStreamGuard guard_; +}; + +/// A variant of OptionalStreamGuard that is specialized for CUDA. See +/// CUDAGuard for when you can use this. +struct OptionalCUDAStreamGuard { + /// Create an uninitialized guard. + explicit OptionalCUDAStreamGuard() = default; + + /// Set the current CUDA device to the device associated with the passed + /// stream, and set the current CUDA stream on that device to the passed + /// stream. Errors if the Stream is not a CUDA stream. + explicit OptionalCUDAStreamGuard(Stream stream) : guard_(stream) {} + + /// Set the current device to the device associated with the passed stream, + /// and set the current stream on that device to the passed stream, + /// if the passed stream is not nullopt. + explicit OptionalCUDAStreamGuard(std::optional stream_opt) + : guard_(stream_opt) {} + + /// Copy is disallowed + OptionalCUDAStreamGuard(const OptionalCUDAStreamGuard&) = delete; + OptionalCUDAStreamGuard& operator=(const OptionalCUDAStreamGuard&) = delete; + + // See Note [Move construction for RAII guards is tricky] + OptionalCUDAStreamGuard(OptionalCUDAStreamGuard&& other) = delete; + + // See Note [Move assignment for RAII guards is tricky] + OptionalCUDAStreamGuard& operator=(OptionalCUDAStreamGuard&& other) = delete; + ~OptionalCUDAStreamGuard() = default; + + /// Resets the currently set CUDA stream to the original stream and + /// the currently set device to the original device. Then, + /// set the current device to the device associated with the passed stream, + /// and set the current stream on that device to the passed stream. + /// Initializes the guard if it was not previously initialized. + void reset_stream(Stream stream) { + guard_.reset_stream(stream); + } + + /// Returns the CUDA stream that was set at the time the guard was most + /// recently initialized, or nullopt if the guard is uninitialized. + std::optional original_stream() const { + auto r = guard_.original_stream(); + if (r.has_value()) { + return CUDAStream(CUDAStream::UNCHECKED, r.value()); + } else { + return std::nullopt; + } + } + + /// Returns the most recent CUDA stream that was set using this stream guard, + /// either from construction, or via reset_stream, if the guard is + /// initialized, or nullopt if the guard is uninitialized. + std::optional current_stream() const { + auto r = guard_.current_stream(); + if (r.has_value()) { + return CUDAStream(CUDAStream::UNCHECKED, r.value()); + } else { + return std::nullopt; + } + } + + /// Restore the original CUDA device and stream, resetting this guard to + /// uninitialized state. + void reset() { + guard_.reset(); + } + + private: + c10::impl::InlineOptionalStreamGuard guard_; +}; + +/// A variant of MultiStreamGuard that is specialized for CUDA. +struct CUDAMultiStreamGuard { + explicit CUDAMultiStreamGuard(ArrayRef streams) + : guard_(unwrapStreams(streams)) {} + + /// Copy is disallowed + CUDAMultiStreamGuard(const CUDAMultiStreamGuard&) = delete; + CUDAMultiStreamGuard& operator=(const CUDAMultiStreamGuard&) = delete; + + // See Note [Move construction for RAII guards is tricky] + CUDAMultiStreamGuard(CUDAMultiStreamGuard&& other) = delete; + + // See Note [Move assignment for RAII guards is tricky] + CUDAMultiStreamGuard& operator=(CUDAMultiStreamGuard&& other) = delete; + ~CUDAMultiStreamGuard() = default; + + private: + c10::impl::InlineMultiStreamGuard guard_; + + static std::vector unwrapStreams(ArrayRef cudaStreams) { + std::vector streams; + streams.reserve(cudaStreams.size()); + for (const CUDAStream& cudaStream : cudaStreams) { + streams.push_back(cudaStream); + } + return streams; + } +}; + +} // namespace c10::cuda diff --git a/phivenv/Lib/site-packages/torch/include/c10/cuda/CUDAMacros.h b/phivenv/Lib/site-packages/torch/include/c10/cuda/CUDAMacros.h new file mode 100644 index 0000000000000000000000000000000000000000..3b69035c4e83fd2eca488a3a726eddd49cab4058 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/cuda/CUDAMacros.h @@ -0,0 +1,51 @@ +#pragma once + +#ifndef C10_USING_CUSTOM_GENERATED_MACROS + +// We have not yet modified the AMD HIP build to generate this file so +// we add an extra option to specifically ignore it. +#ifndef C10_CUDA_NO_CMAKE_CONFIGURE_FILE +#include +#endif // C10_CUDA_NO_CMAKE_CONFIGURE_FILE + +#endif + +// See c10/macros/Export.h for a detailed explanation of what the function +// of these macros are. We need one set of macros for every separate library +// we build. + +#ifdef _WIN32 +#if defined(C10_CUDA_BUILD_SHARED_LIBS) +#define C10_CUDA_EXPORT __declspec(dllexport) +#define C10_CUDA_IMPORT __declspec(dllimport) +#else +#define C10_CUDA_EXPORT +#define C10_CUDA_IMPORT +#endif +#else // _WIN32 +#if defined(__GNUC__) +#define C10_CUDA_EXPORT __attribute__((__visibility__("default"))) +#else // defined(__GNUC__) +#define C10_CUDA_EXPORT +#endif // defined(__GNUC__) +#define C10_CUDA_IMPORT C10_CUDA_EXPORT +#endif // _WIN32 + +// This one is being used by libc10_cuda.so +#ifdef C10_CUDA_BUILD_MAIN_LIB +#define C10_CUDA_API C10_CUDA_EXPORT +#else +#define C10_CUDA_API C10_CUDA_IMPORT +#endif + +/** + * The maximum number of GPUs that we recognizes. Increasing this beyond the + * initial limit of 16 broke Caffe2 testing, hence the ifdef guards. + * This value cannot be more than 128 because our DeviceIndex is a uint8_t. +o */ +#ifdef FBCODE_CAFFE2 +// fbcode depends on this value being 16 +#define C10_COMPILE_TIME_MAX_GPUS 16 +#else +#define C10_COMPILE_TIME_MAX_GPUS 120 +#endif diff --git a/phivenv/Lib/site-packages/torch/include/c10/cuda/CUDAMathCompat.h b/phivenv/Lib/site-packages/torch/include/c10/cuda/CUDAMathCompat.h new file mode 100644 index 0000000000000000000000000000000000000000..33bc5c1b40d3202207a62f500a1f3e99b670f3f0 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/cuda/CUDAMathCompat.h @@ -0,0 +1,152 @@ +#pragma once + +/* This file defines math functions compatible across different gpu + * platforms (currently CUDA and HIP). + */ +#if defined(__CUDACC__) || defined(__HIPCC__) + +#include +#include + +#ifdef __HIPCC__ +#define __MATH_FUNCTIONS_DECL__ inline C10_DEVICE +#else /* __HIPCC__ */ +#ifdef __CUDACC_RTC__ +#define __MATH_FUNCTIONS_DECL__ C10_HOST_DEVICE +#else /* __CUDACC_RTC__ */ +#define __MATH_FUNCTIONS_DECL__ inline C10_HOST_DEVICE +#endif /* __CUDACC_RTC__ */ +#endif /* __HIPCC__ */ + +namespace c10::cuda::compat { + +__MATH_FUNCTIONS_DECL__ float abs(float x) { + return ::fabsf(x); +} +__MATH_FUNCTIONS_DECL__ double abs(double x) { + return ::fabs(x); +} + +__MATH_FUNCTIONS_DECL__ float exp(float x) { + return ::expf(x); +} +__MATH_FUNCTIONS_DECL__ double exp(double x) { + return ::exp(x); +} + +__MATH_FUNCTIONS_DECL__ float ceil(float x) { + return ::ceilf(x); +} +__MATH_FUNCTIONS_DECL__ double ceil(double x) { + return ::ceil(x); +} + +__MATH_FUNCTIONS_DECL__ float copysign(float x, float y) { +#if defined(__CUDA_ARCH__) || defined(__HIPCC__) + return ::copysignf(x, y); +#else + // std::copysign gets ICE/Segfaults with gcc 7.5/8 on arm64 + // (e.g. Jetson), see PyTorch PR #51834 + // This host function needs to be here for the compiler but is never used + TORCH_INTERNAL_ASSERT( + false, "CUDAMathCompat copysign should not run on the CPU"); +#endif +} +__MATH_FUNCTIONS_DECL__ double copysign(double x, double y) { +#if defined(__CUDA_ARCH__) || defined(__HIPCC__) + return ::copysign(x, y); +#else + // see above + TORCH_INTERNAL_ASSERT( + false, "CUDAMathCompat copysign should not run on the CPU"); +#endif +} + +__MATH_FUNCTIONS_DECL__ float floor(float x) { + return ::floorf(x); +} +__MATH_FUNCTIONS_DECL__ double floor(double x) { + return ::floor(x); +} + +__MATH_FUNCTIONS_DECL__ float log(float x) { + return ::logf(x); +} +__MATH_FUNCTIONS_DECL__ double log(double x) { + return ::log(x); +} + +__MATH_FUNCTIONS_DECL__ float log1p(float x) { + return ::log1pf(x); +} + +__MATH_FUNCTIONS_DECL__ double log1p(double x) { + return ::log1p(x); +} + +__MATH_FUNCTIONS_DECL__ float max(float x, float y) { + return ::fmaxf(x, y); +} +__MATH_FUNCTIONS_DECL__ double max(double x, double y) { + return ::fmax(x, y); +} + +__MATH_FUNCTIONS_DECL__ float min(float x, float y) { + return ::fminf(x, y); +} +__MATH_FUNCTIONS_DECL__ double min(double x, double y) { + return ::fmin(x, y); +} + +__MATH_FUNCTIONS_DECL__ float pow(float x, float y) { + return ::powf(x, y); +} +__MATH_FUNCTIONS_DECL__ double pow(double x, double y) { + return ::pow(x, y); +} + +__MATH_FUNCTIONS_DECL__ void sincos(float x, float* sptr, float* cptr) { + return ::sincosf(x, sptr, cptr); +} +__MATH_FUNCTIONS_DECL__ void sincos(double x, double* sptr, double* cptr) { + return ::sincos(x, sptr, cptr); +} + +__MATH_FUNCTIONS_DECL__ float sqrt(float x) { + return ::sqrtf(x); +} +__MATH_FUNCTIONS_DECL__ double sqrt(double x) { + return ::sqrt(x); +} + +__MATH_FUNCTIONS_DECL__ float rsqrt(float x) { + return ::rsqrtf(x); +} +__MATH_FUNCTIONS_DECL__ double rsqrt(double x) { + return ::rsqrt(x); +} + +__MATH_FUNCTIONS_DECL__ float tan(float x) { + return ::tanf(x); +} +__MATH_FUNCTIONS_DECL__ double tan(double x) { + return ::tan(x); +} + +__MATH_FUNCTIONS_DECL__ float tanh(float x) { + return ::tanhf(x); +} +__MATH_FUNCTIONS_DECL__ double tanh(double x) { + return ::tanh(x); +} + +__MATH_FUNCTIONS_DECL__ float normcdf(float x) { + return ::normcdff(x); +} +__MATH_FUNCTIONS_DECL__ double normcdf(double x) { + return ::normcdf(x); +} + +} // namespace c10::cuda::compat + +#endif diff --git a/phivenv/Lib/site-packages/torch/include/c10/cuda/CUDAMiscFunctions.h b/phivenv/Lib/site-packages/torch/include/c10/cuda/CUDAMiscFunctions.h new file mode 100644 index 0000000000000000000000000000000000000000..203363028fd72d01065f161832c7e6c5f6b5217d --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/cuda/CUDAMiscFunctions.h @@ -0,0 +1,12 @@ +#pragma once +// this file is to avoid circular dependency between CUDAFunctions.h and +// CUDAExceptions.h + +#include + +#include + +namespace c10::cuda { +C10_CUDA_API const char* get_cuda_check_suffix() noexcept; +C10_CUDA_API std::mutex* getFreeMutex(); +} // namespace c10::cuda diff --git a/phivenv/Lib/site-packages/torch/include/c10/cuda/CUDAStream.h b/phivenv/Lib/site-packages/torch/include/c10/cuda/CUDAStream.h new file mode 100644 index 0000000000000000000000000000000000000000..0ca3ef1f1a8cfe73ba0c8ec69628c1a104715ccf --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/cuda/CUDAStream.h @@ -0,0 +1,268 @@ +#pragma once + +#include + +#include +#include +#include +#include + +/* + * Stream pool note. + * + * A CUDAStream is an abstraction of an actual cuStream on the GPU. CUDAStreams + * are backed by cuStreams, but they use several pools to minimize the costs + * associated with creating, retaining, and destroying cuStreams. + * + * There are three pools per device, and a device's pools are lazily created. + * + * The first pool contains only the default stream. When the default stream + * is requested it's returned. + * + * The second pool is the "low priority" or "default priority" streams. In + * HIP builds there is no distinction between streams in this pool and streams + * in the third pool (below). There are 32 of these streams per device, and + * when a stream is requested one of these streams is returned round-robin. + * That is, the first stream requested is at index 0, the second at index 1... + * to index 31, then index 0 again. + * + * This means that if 33 low priority streams are requested, the first and + * last streams requested are actually the same stream (under the covers) + * and kernels enqueued on them cannot run concurrently. + * + * The third pool is the "high priority" streams. The third pool acts like + * the second pool except the streams are created with a higher priority. + * + * These pools suggest that stream users should prefer many short-lived streams, + * as the cost of acquiring and releasing streams is effectively zero. If + * many longer-lived streams are required in performance critical scenarios + * then the functionality here may need to be extended to allow, for example, + * "reserving" a subset of the pool so that other streams do not accidentally + * overlap the performance critical streams. + * + * Note: although the notion of "current stream for device" is thread local + * (every OS thread has a separate current stream, as one might expect), + * the stream pool is global across all threads; stream 0 is always stream 0 + * no matter which thread you use it on. Multiple threads can synchronize + * on the same stream. Although the CUDA documentation is not very clear + * on the matter, streams are thread safe; e.g., it is safe to enqueue + * a kernel on the same stream from two different threads. + */ + +namespace c10::cuda { + +static constexpr int max_compile_time_stream_priorities = 4; + +// Value object representing a CUDA stream. This is just a wrapper +// around c10::Stream, but it comes with a little extra CUDA-specific +// functionality (conversion to cudaStream_t), and a guarantee that +// the wrapped c10::Stream really is a CUDA stream. +class C10_CUDA_API CUDAStream { + public: + enum Unchecked { UNCHECKED }; + + /// Construct a CUDAStream from a Stream. This construction is checked, + /// and will raise an error if the Stream is not, in fact, a CUDA stream. + explicit CUDAStream(Stream stream) : stream_(stream) { + TORCH_CHECK(stream_.device_type() == DeviceType::CUDA); + } + + /// Construct a CUDAStream from a Stream with no error checking. + /// This constructor uses the "named" constructor idiom, and can + /// be invoked as: CUDAStream(CUDAStream::UNCHECKED, stream) + explicit CUDAStream(Unchecked, Stream stream) : stream_(stream) {} + + bool operator==(const CUDAStream& other) const noexcept { + return unwrap() == other.unwrap(); + } + + bool operator!=(const CUDAStream& other) const noexcept { + return unwrap() != other.unwrap(); + } + + /// Implicit conversion to cudaStream_t. + operator cudaStream_t() const { + return stream(); + } + + /// Implicit conversion to Stream (a.k.a., forget that the stream is a + /// CUDA stream). + operator Stream() const { + return unwrap(); + } + + /// Used to avoid baking in device type explicitly to Python-side API. + DeviceType device_type() const { + return DeviceType::CUDA; + } + + /// Get the CUDA device index that this stream is associated with. + DeviceIndex device_index() const { + return stream_.device_index(); + } + + /// Get the full Device that this stream is associated with. The Device + /// is guaranteed to be a CUDA device. + Device device() const { + return Device(DeviceType::CUDA, device_index()); + } + + /// Return the stream ID corresponding to this particular stream. + StreamId id() const { + return stream_.id(); + } + + bool query() const { + DeviceGuard guard{stream_.device()}; + cudaError_t err = C10_CUDA_ERROR_HANDLED(cudaStreamQuery(stream())); + + if (err == cudaSuccess) { + return true; + } else if (err != cudaErrorNotReady) { + C10_CUDA_CHECK(err); + } else { + // ignore and clear the error if not ready + (void)cudaGetLastError(); + } + + return false; + } + + void synchronize() const { + DeviceGuard guard{stream_.device()}; + c10::cuda::stream_synchronize(stream()); + } + + int priority() const { + DeviceGuard guard{stream_.device()}; + int priority = 0; + C10_CUDA_CHECK(cudaStreamGetPriority(stream(), &priority)); + return priority; + } + + /// Explicit conversion to cudaStream_t. + cudaStream_t stream() const; + + /// Explicit conversion to Stream. + Stream unwrap() const { + return stream_; + } + + /// Reversibly pack a CUDAStream into a struct representation. + /// Previously the stream's data was packed into a single int64_t, + /// as it was assumed the fields would not require more than + /// 64 bits of storage in total. + /// See https://github.com/pytorch/pytorch/issues/75854 + /// for more information regarding newer platforms that may violate + /// this assumption. + /// + /// The CUDAStream can be unpacked using unpack(). + struct c10::StreamData3 pack3() const { + return stream_.pack3(); + } + + // Unpack a CUDAStream from the 3 fields generated by pack(). + static CUDAStream unpack3( + StreamId stream_id, + DeviceIndex device_index, + DeviceType device_type) { + return CUDAStream(Stream::unpack3(stream_id, device_index, device_type)); + } + + static std::tuple priority_range() { + // Note: this returns the range of priority **supported by PyTorch**, not + // the range of priority **supported by CUDA**. The former is a subset of + // the latter. + int least_priority = 0, greatest_priority = 0; + C10_CUDA_CHECK( + cudaDeviceGetStreamPriorityRange(&least_priority, &greatest_priority)); +#ifdef USE_ROCM + // See Note [HIP stream priorities] + TORCH_INTERNAL_ASSERT( + least_priority == 1, "Unexpected HIP stream priority range"); + least_priority = 0; +#else + TORCH_INTERNAL_ASSERT( + least_priority == 0, "Unexpected CUDA stream priority range"); +#endif + TORCH_INTERNAL_ASSERT( + greatest_priority <= -1, "Unexpected CUDA stream priority range"); + greatest_priority = std::max( + -c10::cuda::max_compile_time_stream_priorities + 1, greatest_priority); + return std::make_tuple(least_priority, greatest_priority); + } + + // Deleted for now; use CUDAEvent::block instead + // void synchronize_with(const CUDAEvent& event) const; + + private: + Stream stream_; +}; + +/** + * Get a new stream from the CUDA stream pool. You can think of this + * as "creating" a new stream, but no such creation actually happens; + * instead, streams are preallocated from the pool and returned in a + * round-robin fashion. + * + * You can request a stream from the high priority pool by setting + * isHighPriority to true, or a stream for a specific device by setting device + * (defaulting to the current CUDA stream.) + */ +C10_API CUDAStream +getStreamFromPool(const bool isHighPriority = false, DeviceIndex device = -1); +// no default priority to disambiguate overloads +C10_API CUDAStream +getStreamFromPool(const int priority, DeviceIndex device = -1); + +/** + * Get a CUDAStream from a externally allocated one. + * + * This is mainly for interoperability with different libraries where we + * want to operate on a non-torch allocated stream for data exchange or similar + * purposes + */ +C10_API CUDAStream +getStreamFromExternal(cudaStream_t ext_stream, DeviceIndex device_index); + +/** + * Get the default CUDA stream, for the passed CUDA device, or for the + * current device if no device index is passed. The default stream is + * where most computation occurs when you aren't explicitly using + * streams. + */ +C10_API CUDAStream getDefaultCUDAStream(DeviceIndex device_index = -1); + +/** + * Get the current CUDA stream, for the passed CUDA device, or for the + * current device if no device index is passed. The current CUDA stream + * will usually be the default CUDA stream for the device, but it may + * be different if someone called 'setCurrentCUDAStream' or used 'StreamGuard' + * or 'CUDAStreamGuard'. + */ +C10_API CUDAStream getCurrentCUDAStream(DeviceIndex device_index = -1); + +/** + * Set the current stream on the device of the passed in stream to be + * the passed in stream. Yes, you read that right: this function + * has *nothing* to do with the current device: it toggles the current + * stream of the device of the passed stream. + * + * Confused? Avoid using this function; prefer using 'CUDAStreamGuard' instead + * (which will switch both your current device and current stream in the way you + * expect, and reset it back to its original state afterwards). + */ +C10_API void setCurrentCUDAStream(CUDAStream stream); + +C10_API std::ostream& operator<<(std::ostream& stream, const CUDAStream& s); + +} // namespace c10::cuda + +namespace std { +template <> +struct hash { + size_t operator()(c10::cuda::CUDAStream s) const noexcept { + return std::hash{}(s.unwrap()); + } +}; +} // namespace std diff --git a/phivenv/Lib/site-packages/torch/include/c10/cuda/driver_api.h b/phivenv/Lib/site-packages/torch/include/c10/cuda/driver_api.h new file mode 100644 index 0000000000000000000000000000000000000000..91cadfb66f3bb7f37f2352c8bfa3f37f1b117a13 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/cuda/driver_api.h @@ -0,0 +1,84 @@ +#pragma once +#include +#define NVML_NO_UNVERSIONED_FUNC_DEFS +#include + +#include + +#define C10_CUDA_DRIVER_CHECK(EXPR) \ + do { \ + CUresult __err = EXPR; \ + if (__err != CUDA_SUCCESS) { \ + const char* err_str; \ + CUresult get_error_str_err [[maybe_unused]] = \ + c10::cuda::DriverAPI::get()->cuGetErrorString_(__err, &err_str); \ + if (get_error_str_err != CUDA_SUCCESS) { \ + TORCH_CHECK(false, "CUDA driver error: unknown error"); \ + } else { \ + TORCH_CHECK(false, "CUDA driver error: ", err_str); \ + } \ + } \ + } while (0) + +// The integer in the second column specifies the requested CUDA Driver API +// version. The dynamic loader will accept a driver with a newer version, but it +// ensures that the requested symbol exists in *at least* the specified version +// or earlier. + +// Keep these requested versions as low as possible to maximize compatibility +// across different driver versions. + +// Why do we pin to an older version instead of using the latest? +// If a user installs a newer driver, blindly resolving the symbol may bind to a +// newer version of the function with different behavior, potentially breaking +// PyTorch. + +#define C10_LIBCUDA_DRIVER_API_REQUIRED(_) \ + _(cuDeviceGetAttribute, 12000) \ + _(cuMemAddressReserve, 12000) \ + _(cuMemRelease, 12000) \ + _(cuMemMap, 12000) \ + _(cuMemAddressFree, 12000) \ + _(cuMemSetAccess, 12000) \ + _(cuMemUnmap, 12000) \ + _(cuMemCreate, 12000) \ + _(cuMemGetAllocationGranularity, 12000) \ + _(cuMemExportToShareableHandle, 12000) \ + _(cuMemImportFromShareableHandle, 12000) \ + _(cuMemsetD32Async, 12000) \ + _(cuStreamWriteValue32, 12000) \ + _(cuGetErrorString, 12000) + +#if defined(CUDA_VERSION) && (CUDA_VERSION >= 12030) +#define C10_LIBCUDA_DRIVER_API_OPTIONAL(_) \ + _(cuMulticastAddDevice, 12030) \ + _(cuMulticastBindMem, 12030) \ + _(cuMulticastCreate, 12030) +#else +#define C10_LIBCUDA_DRIVER_API_OPTIONAL(_) +#endif + +#define C10_NVML_DRIVER_API(_) \ + _(nvmlInit_v2) \ + _(nvmlDeviceGetHandleByPciBusId_v2) \ + _(nvmlDeviceGetNvLinkRemoteDeviceType) \ + _(nvmlDeviceGetNvLinkRemotePciInfo_v2) \ + _(nvmlDeviceGetComputeRunningProcesses) \ + _(nvmlSystemGetCudaDriverVersion_v2) + +namespace c10::cuda { + +struct DriverAPI { +#define CREATE_MEMBER_VERSIONED(name, version) decltype(&name) name##_; +#define CREATE_MEMBER(name) decltype(&name) name##_; + C10_LIBCUDA_DRIVER_API_REQUIRED(CREATE_MEMBER_VERSIONED) + C10_LIBCUDA_DRIVER_API_OPTIONAL(CREATE_MEMBER_VERSIONED) + C10_NVML_DRIVER_API(CREATE_MEMBER) +#undef CREATE_MEMBER_VERSIONED +#undef CREATE_MEMBER + + static DriverAPI* get(); + static void* get_nvml_handle(); +}; + +} // namespace c10::cuda diff --git a/phivenv/Lib/site-packages/torch/include/c10/cuda/impl/CUDAGuardImpl.h b/phivenv/Lib/site-packages/torch/include/c10/cuda/impl/CUDAGuardImpl.h new file mode 100644 index 0000000000000000000000000000000000000000..8f3a6e983cb3b92b9cdb2b94ae3e41343dfc0f73 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/cuda/impl/CUDAGuardImpl.h @@ -0,0 +1,265 @@ +#pragma once + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +namespace c10::cuda::impl { + +struct CUDAGuardImpl final : public c10::impl::DeviceGuardImplInterface { + static constexpr DeviceType static_type = DeviceType::CUDA; + + CUDAGuardImpl() = default; + explicit CUDAGuardImpl(DeviceType t) { + TORCH_CHECK( + t == DeviceType::CUDA, + "CUDAGuardImpl initialized with non-CUDA DeviceType: ", + t); + } + DeviceType type() const override { + return DeviceType::CUDA; + } + Device exchangeDevice(Device d) const override { + TORCH_CHECK(d.is_cuda(), "Expected a CUDA device, but got ", d); + auto old_device_index = c10::cuda::ExchangeDevice(d.index()); + return Device(DeviceType::CUDA, old_device_index); + } + Device getDevice() const override { + DeviceIndex device = 0; + C10_CUDA_CHECK(c10::cuda::GetDevice(&device)); + return Device(DeviceType::CUDA, device); + } + std::optional uncheckedGetDevice() const noexcept { + DeviceIndex device{-1}; + const auto err = C10_CUDA_ERROR_HANDLED(c10::cuda::GetDevice(&device)); + C10_CUDA_CHECK_WARN(err); + if (err != cudaSuccess) { + return std::nullopt; + } + return Device(DeviceType::CUDA, device); + } + void setDevice(Device d) const override { + TORCH_CHECK(d.is_cuda(), "Expected a CUDA device, but got ", d); + C10_CUDA_CHECK(c10::cuda::SetDevice(d.index())); + } + void uncheckedSetDevice(Device d) const noexcept override { + C10_CUDA_CHECK_WARN(c10::cuda::MaybeSetDevice(d.index())); + } + Stream getStream(Device d) const override { + return getCurrentCUDAStream(d.index()).unwrap(); + } + Stream getDefaultStream(Device d) const override { + return getDefaultCUDAStream(d.index()); + } + Stream getNewStream(Device d, int priority = 0) const override { + return getStreamFromPool(priority, d.index()); + } + Stream getStreamFromGlobalPool(Device d, bool isHighPriority = false) + const override { + return getStreamFromPool(isHighPriority, d.index()); + } + // NB: These do NOT set the current device + Stream exchangeStream(Stream s) const override { + CUDAStream cs(s); + auto old_stream = getCurrentCUDAStream(s.device().index()); + setCurrentCUDAStream(cs); + return old_stream.unwrap(); + } + DeviceIndex deviceCount() const noexcept override { + return device_count(); + } + + // Event-related functions + void createEvent(cudaEvent_t* cuda_event, const EventFlag flag) const { + // Maps PyTorch's Event::Flag to CUDA flag + auto cuda_flag = cudaEventDefault; + switch (flag) { + case EventFlag::PYTORCH_DEFAULT: + cuda_flag = cudaEventDisableTiming; + break; + case EventFlag::BACKEND_DEFAULT: + cuda_flag = cudaEventDefault; + break; + default: + TORCH_CHECK(false, "CUDA event received unknown flag"); + } + + C10_CUDA_CHECK(cudaEventCreateWithFlags(cuda_event, cuda_flag)); + const c10::impl::PyInterpreter* interp = c10::impl::GPUTrace::get_trace(); + if (C10_UNLIKELY(interp)) { + (*interp)->trace_gpu_event_creation( + c10::kCUDA, reinterpret_cast(cuda_event)); + } + } + + void destroyEvent(void* event, const DeviceIndex device_index) + const noexcept override { + if (!event) + return; + auto cuda_event = static_cast(event); + DeviceIndex orig_device{-1}; + C10_CUDA_CHECK_WARN(c10::cuda::GetDevice(&orig_device)); + C10_CUDA_CHECK_WARN(c10::cuda::SetDevice(device_index)); + const c10::impl::PyInterpreter* interp = c10::impl::GPUTrace::get_trace(); + if (C10_UNLIKELY(interp)) { + (*interp)->trace_gpu_event_deletion( + c10::kCUDA, reinterpret_cast(cuda_event)); + } + C10_CUDA_CHECK_WARN(cudaEventDestroy(cuda_event)); + C10_CUDA_CHECK_WARN(c10::cuda::SetDevice(orig_device)); + } + + void record( + void** event, + const Stream& stream, + const DeviceIndex device_index, + const EventFlag flag) const override { + TORCH_CHECK( + device_index == -1 || device_index == stream.device_index(), + "Event device index ", + device_index, + " does not match recording stream's device index ", + stream.device_index(), + "."); + + cudaEvent_t cuda_event = static_cast(*event); + CUDAStream cuda_stream{stream}; + + // Moves to stream's device to record + const auto orig_device = getDevice(); + setDevice(stream.device()); + + // Creates the event (lazily) + if (!cuda_event) + createEvent(&cuda_event, flag); + C10_CUDA_CHECK(cudaEventRecord(cuda_event, cuda_stream)); + // Makes the void* point to the (possibly just allocated) CUDA event + *event = cuda_event; + const c10::impl::PyInterpreter* interp = c10::impl::GPUTrace::get_trace(); + if (C10_UNLIKELY(interp)) { + (*interp)->trace_gpu_event_record( + c10::kCUDA, + reinterpret_cast(cuda_event), + reinterpret_cast(cuda_stream.stream())); + } + + // Resets device + setDevice(orig_device); + } + + void block(void* event, const Stream& stream) const override { + if (!event) + return; + cudaEvent_t cuda_event = static_cast(event); + CUDAStream cuda_stream{stream}; + const auto orig_device = getDevice(); + setDevice(stream.device()); + C10_CUDA_CHECK(cudaStreamWaitEvent( + cuda_stream, + cuda_event, + /*flags (must be zero)=*/0)); + const c10::impl::PyInterpreter* interp = c10::impl::GPUTrace::get_trace(); + if (C10_UNLIKELY(interp)) { + (*interp)->trace_gpu_event_wait( + c10::kCUDA, + reinterpret_cast(cuda_event), + reinterpret_cast(cuda_stream.stream())); + } + setDevice(orig_device); + } + + // May be called from any device + bool queryEvent(void* event) const override { + if (!event) + return true; + cudaEvent_t cuda_event = static_cast(event); + // Note: cudaEventQuery can be safely called from any device + const cudaError_t err = C10_CUDA_ERROR_HANDLED(cudaEventQuery(cuda_event)); + if (err != cudaErrorNotReady) { + C10_CUDA_CHECK(err); + } else { + // ignore and clear the error if not ready + (void)cudaGetLastError(); + } + return (err == cudaSuccess); + } + + // Stream-related functions + bool queryStream(const Stream& stream) const override { + CUDAStream cuda_stream{stream}; + return cuda_stream.query(); + } + + void synchronizeStream(const Stream& stream) const override { + CUDAStream cuda_stream{stream}; + cuda_stream.synchronize(); + } + + void synchronizeEvent(void* event) const override { + if (!event) + return; + cudaEvent_t cuda_event = static_cast(event); + const c10::impl::PyInterpreter* interp = c10::impl::GPUTrace::get_trace(); + if (C10_UNLIKELY(interp)) { + (*interp)->trace_gpu_event_synchronization( + c10::kCUDA, reinterpret_cast(cuda_event)); + } + // Note: cudaEventSynchronize can be safely called from any device + C10_CUDA_CHECK(cudaEventSynchronize(cuda_event)); + } + + // Note: synchronizeDevice can be safely called from any device + void synchronizeDevice(const c10::DeviceIndex device_index) const override { + DeviceIndex orig_device{-1}; + C10_CUDA_CHECK(c10::cuda::GetDevice(&orig_device)); + C10_CUDA_CHECK(c10::cuda::SetDevice(device_index)); + const c10::impl::PyInterpreter* interp = c10::impl::GPUTrace::get_trace(); + if (C10_UNLIKELY(interp)) { + (*interp)->trace_gpu_device_synchronization(c10::kCUDA); + } + C10_CUDA_CHECK(cudaDeviceSynchronize()); + C10_CUDA_CHECK(c10::cuda::SetDevice(orig_device)); + } + + void recordDataPtrOnStream(const c10::DataPtr& data_ptr, const Stream& stream) + const override { + CUDAStream cuda_stream{stream}; + CUDACachingAllocator::recordStream(data_ptr, cuda_stream); + } + + double elapsedTime(void* event1, void* event2, const DeviceIndex device_index) + const override { + TORCH_CHECK( + event1 && event2, + "Both events must be recorded before calculating elapsed time."); + // Even though cudaEventElapsedTime can be safely called from any device, if + // the current device is not initialized, it will create a new cuda context, + // which will consume a lot of memory. + DeviceIndex orig_device{-1}; + C10_CUDA_CHECK(c10::cuda::GetDevice(&orig_device)); + C10_CUDA_CHECK(c10::cuda::SetDevice(device_index)); + cudaEvent_t cuda_event1 = static_cast(event1); + cudaEvent_t cuda_event2 = static_cast(event2); + float time_ms = 0; + // raise cudaErrorNotReady if either event is recorded but not yet completed + C10_CUDA_CHECK(cudaEventElapsedTime(&time_ms, cuda_event1, cuda_event2)); + C10_CUDA_CHECK(c10::cuda::SetDevice(orig_device)); + return static_cast(time_ms); + } +}; + +} // namespace c10::cuda::impl diff --git a/phivenv/Lib/site-packages/torch/include/c10/cuda/impl/CUDATest.h b/phivenv/Lib/site-packages/torch/include/c10/cuda/impl/CUDATest.h new file mode 100644 index 0000000000000000000000000000000000000000..9b288250b5cec90a2540be3b5cae5af2ae965994 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/cuda/impl/CUDATest.h @@ -0,0 +1,9 @@ +#pragma once + +#include + +namespace c10::cuda::impl { + +C10_CUDA_API int c10_cuda_test(); + +} diff --git a/phivenv/Lib/site-packages/torch/include/c10/macros/Export.h b/phivenv/Lib/site-packages/torch/include/c10/macros/Export.h new file mode 100644 index 0000000000000000000000000000000000000000..63ccb0ef20a7fc0271a0a0aa5d8c7e1ff28128e8 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/macros/Export.h @@ -0,0 +1,78 @@ +#ifndef C10_MACROS_EXPORT_H_ +#define C10_MACROS_EXPORT_H_ + +#ifndef C10_USING_CUSTOM_GENERATED_MACROS +#include +#endif // C10_USING_CUSTOM_GENERATED_MACROS + +#include + +// This one is being used by libtorch.so +#ifdef CAFFE2_BUILD_MAIN_LIB +#define TORCH_API C10_EXPORT +#else +#define TORCH_API C10_IMPORT +#endif + +// You may be wondering: Whose brilliant idea was it to split torch_cuda into +// two pieces with confusing names? +// Once upon a time, there _was_ only TORCH_CUDA_API. All was happy until we +// tried to compile PyTorch for CUDA 11.1, which ran into relocation marker +// issues when linking big binaries. +// (https://github.com/pytorch/pytorch/issues/39968) We had two choices: +// (1) Stop supporting so many GPU architectures +// (2) Do something else +// We chose #2 and decided to split the behemoth that was torch_cuda into two +// smaller libraries, one with most of the core kernel functions (torch_cuda_cu) +// and the other that had..well..everything else (torch_cuda_cpp). The idea was +// this: instead of linking our static libraries (like the hefty +// libcudnn_static.a) with another huge library, torch_cuda, and run into pesky +// relocation marker issues, we could link our static libraries to a smaller +// part of torch_cuda (torch_cuda_cpp) and avoid the issues. + +// libtorch_cuda_cu.so +#ifdef TORCH_CUDA_CU_BUILD_MAIN_LIB +#define TORCH_CUDA_CU_API C10_EXPORT +#elif defined(BUILD_SPLIT_CUDA) +#define TORCH_CUDA_CU_API C10_IMPORT +#endif + +// libtorch_cuda_cpp.so +#ifdef TORCH_CUDA_CPP_BUILD_MAIN_LIB +#define TORCH_CUDA_CPP_API C10_EXPORT +#elif defined(BUILD_SPLIT_CUDA) +#define TORCH_CUDA_CPP_API C10_IMPORT +#endif + +// libtorch_cuda.so (where torch_cuda_cu and torch_cuda_cpp are a part of the +// same api) +#ifdef TORCH_CUDA_BUILD_MAIN_LIB +#define TORCH_CUDA_CPP_API C10_EXPORT +#define TORCH_CUDA_CU_API C10_EXPORT +#elif !defined(BUILD_SPLIT_CUDA) +#define TORCH_CUDA_CPP_API C10_IMPORT +#define TORCH_CUDA_CU_API C10_IMPORT +#endif + +#if defined(TORCH_HIP_BUILD_MAIN_LIB) +#define TORCH_HIP_CPP_API C10_EXPORT +#define TORCH_HIP_API C10_EXPORT +#else +#define TORCH_HIP_CPP_API C10_IMPORT +#define TORCH_HIP_API C10_IMPORT +#endif + +#if defined(TORCH_XPU_BUILD_MAIN_LIB) +#define TORCH_XPU_API C10_EXPORT +#else +#define TORCH_XPU_API C10_IMPORT +#endif + +// Enums only need to be exported on windows for non-CUDA files +#if defined(_WIN32) && defined(__CUDACC__) +#define C10_API_ENUM C10_API +#else +#define C10_API_ENUM +#endif + +#endif // C10_MACROS_EXPORT_H_ diff --git a/phivenv/Lib/site-packages/torch/include/c10/macros/Macros.h b/phivenv/Lib/site-packages/torch/include/c10/macros/Macros.h new file mode 100644 index 0000000000000000000000000000000000000000..fb03fe8e9dbd330981831a9f242ed2f243e0b6da --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/macros/Macros.h @@ -0,0 +1,534 @@ +#ifndef C10_MACROS_MACROS_H_ +#define C10_MACROS_MACROS_H_ +#include + +/* Main entry for c10/macros. + * + * In your code, include c10/macros/Macros.h directly, instead of individual + * files in this folder. + */ + +// For build systems that do not directly depend on CMake and directly build +// from the source directory (such as Buck), one may not have a cmake_macros.h +// file at all. In this case, the build system is responsible for providing +// correct macro definitions corresponding to the cmake_macros.h.in file. +// +// In such scenarios, one should define the macro +// C10_USING_CUSTOM_GENERATED_MACROS +// to inform this header that it does not need to include the cmake_macros.h +// file. + +#ifndef C10_USING_CUSTOM_GENERATED_MACROS +#include +#endif // C10_USING_CUSTOM_GENERATED_MACROS + +#include + +#if defined(__clang__) +#define __ubsan_ignore_float_divide_by_zero__ \ + __attribute__((no_sanitize("float-divide-by-zero"))) +#define __ubsan_ignore_undefined__ __attribute__((no_sanitize("undefined"))) +#define __ubsan_ignore_signed_int_overflow__ \ + __attribute__((no_sanitize("signed-integer-overflow"))) +#define __ubsan_ignore_pointer_overflow__ \ + __attribute__((no_sanitize("pointer-overflow"))) +#define __ubsan_ignore_function__ __attribute__((no_sanitize("function"))) +#define __ubsan_ignore_float_cast_overflow__ \ + __attribute__((no_sanitize("float-cast-overflow"))) +#else +#define __ubsan_ignore_float_divide_by_zero__ +#define __ubsan_ignore_undefined__ +#define __ubsan_ignore_signed_int_overflow__ +#define __ubsan_ignore_pointer_overflow__ +#define __ubsan_ignore_function__ +#define __ubsan_ignore_float_cast_overflow__ +#endif + +// Detect address sanitizer as some stuff doesn't work with it +#undef C10_ASAN_ENABLED + +// for clang +#if defined(__has_feature) +#if ((__has_feature(address_sanitizer))) +#define C10_ASAN_ENABLED 1 +#endif +#endif + +// for gcc +#if defined(__SANITIZE_ADDRESS__) +#if __SANITIZE_ADDRESS__ +#if !defined(C10_ASAN_ENABLED) +#define C10_ASAN_ENABLED 1 +#endif +#endif +#endif + +#if !defined(C10_ASAN_ENABLED) +#define C10_ASAN_ENABLED 0 +#endif + +// Detect undefined-behavior sanitizer (UBSAN) +#undef C10_UBSAN_ENABLED + +// for clang or gcc >= 14 +// NB: gcc 14 adds support for Clang's __has_feature +// https://gcc.gnu.org/gcc-14/changes.html +// gcc < 14 doesn't have a macro for UBSAN +// (e.g. __SANITIZE_UNDEFINED__ does not exist in gcc) +// https://github.com/google/sanitizers/issues/765 +#if defined(__has_feature) +#if ((__has_feature(undefined_behavior_sanitizer))) +#define C10_UBSAN_ENABLED 1 +#endif +#endif + +#if !defined(C10_UBSAN_ENABLED) +#define C10_UBSAN_ENABLED 0 +#endif + +// Disable the copy and assignment operator for a class. Note that this will +// disable the usage of the class in std containers. +#define C10_DISABLE_COPY_AND_ASSIGN(classname) \ + classname(const classname&) = delete; \ + classname& operator=(const classname&) = delete + +#define C10_CONCATENATE_IMPL(s1, s2) s1##s2 +#define C10_CONCATENATE(s1, s2) C10_CONCATENATE_IMPL(s1, s2) + +#define C10_MACRO_EXPAND(args) args + +#define C10_STRINGIZE_IMPL(x) #x +#define C10_STRINGIZE(x) C10_STRINGIZE_IMPL(x) + +/** + * C10_ANONYMOUS_VARIABLE(str) introduces a new identifier which starts with + * str and ends with a unique number. + */ +#ifdef __COUNTER__ +#define C10_UID __COUNTER__ +#define C10_ANONYMOUS_VARIABLE(str) C10_CONCATENATE(str, __COUNTER__) +#else +#define C10_UID __LINE__ +#define C10_ANONYMOUS_VARIABLE(str) C10_CONCATENATE(str, __LINE__) +#endif + +#ifdef __has_cpp_attribute +#define C10_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x) +#else +#define C10_HAS_CPP_ATTRIBUTE(x) (0) +#endif + +#ifndef FBCODE_CAFFE2 +/// DEPRECATED: Warn if a type or return value is discarded. +#define C10_NODISCARD [[nodiscard]] + +/// DEPRECATED: Suppress an unused variable. +#define C10_UNUSED [[maybe_unused]] +#endif + +#if !defined(__has_attribute) +#define __has_attribute(x) 0 +#endif + +// Direct port of LLVM_ATTRIBUTE_USED. +#if __has_attribute(used) +#define C10_USED __attribute__((__used__)) +#else +#define C10_USED +#endif + +#define C10_RESTRICT __restrict + +// Simply define the namespace, in case a dependent library want to refer to +// the c10 namespace but not any nontrivial files. +namespace c10 {} +namespace c10::cuda {} +namespace c10::hip {} +namespace c10::xpu {} + +// Since C10 is the core library for caffe2 (and aten), we will simply reroute +// all abstractions defined in c10 to be available in caffe2 as well. +// This is only for backwards compatibility. Please use the symbols from the +// c10 namespace where possible. +namespace caffe2 { +using namespace c10; +} +namespace at { +using namespace c10; +} +namespace at::cuda { +using namespace c10::cuda; +} // namespace at::cuda + +// WARNING!!! THIS IS A GIANT HACK!!! +// This line means you cannot simultaneously include c10/hip +// and c10/cuda and then use them from the at::cuda namespace. +// This is true in practice, because HIPIFY works inplace on +// files in ATen/cuda, so it assumes that c10::hip is available +// from at::cuda. This namespace makes that happen. When +// HIPIFY is no longer out-of-place, we can switch the cuda +// here to hip and everyone is happy. +namespace at::cuda { +using namespace c10::hip; +} // namespace at::cuda + +namespace at::xpu { +using namespace c10::xpu; +} // namespace at::xpu + +// C10_LIKELY/C10_UNLIKELY +// +// These macros provide parentheses, so you can use these macros as: +// +// if C10_LIKELY(some_expr) { +// ... +// } +// +// NB: static_cast to boolean is mandatory in C++, because __builtin_expect +// takes a long argument, which means you may trigger the wrong conversion +// without it. +// +#if defined(__GNUC__) || defined(__ICL) || defined(__clang__) +#define C10_LIKELY(expr) (__builtin_expect(static_cast(expr), 1)) +#define C10_UNLIKELY(expr) (__builtin_expect(static_cast(expr), 0)) +#else +#define C10_LIKELY(expr) (expr) +#define C10_UNLIKELY(expr) (expr) +#endif + +/// C10_NOINLINE - Functions whose declaration is annotated with this will not +/// be inlined. +#ifdef __GNUC__ +#define C10_NOINLINE __attribute__((noinline)) +#elif _MSC_VER +#define C10_NOINLINE __declspec(noinline) +#else +#define C10_NOINLINE +#endif + +#if defined(_MSC_VER) +#define C10_ALWAYS_INLINE __forceinline +#elif __has_attribute(always_inline) || defined(__GNUC__) +#define C10_ALWAYS_INLINE __attribute__((__always_inline__)) inline +#else +#define C10_ALWAYS_INLINE inline +#endif + +// Unlike C10_ALWAYS_INLINE, C10_ALWAYS_INLINE_ATTRIBUTE can be used +// on a lambda. +#if defined(_MSC_VER) +// MSVC 14.39 is reasonably recent and doesn't like +// [[msvc::forceinline]] on a lambda, so don't try to use it. +#define C10_ALWAYS_INLINE_ATTRIBUTE +#elif __has_attribute(always_inline) || defined(__GNUC__) +#define C10_ALWAYS_INLINE_ATTRIBUTE __attribute__((__always_inline__)) +#else +#define C10_ALWAYS_INLINE_ATTRIBUTE +#endif + +#if defined(_MSC_VER) +#define C10_ATTR_VISIBILITY_HIDDEN +#elif defined(__GNUC__) +#define C10_ATTR_VISIBILITY_HIDDEN __attribute__((__visibility__("hidden"))) +#else +#define C10_ATTR_VISIBILITY_HIDDEN +#endif + +#define C10_ERASE C10_ALWAYS_INLINE C10_ATTR_VISIBILITY_HIDDEN + +#include + +#ifdef __HIPCC__ +// Unlike CUDA, HIP requires a HIP header to be included for __host__ to work. +// We do this #include here so that C10_HOST_DEVICE and friends will Just Work. +// See https://github.com/ROCm/hip/issues/441 +#include +#endif + +#if defined(__CUDACC__) || defined(__HIPCC__) +// Designates functions callable from the host (CPU) and the device (GPU) +#define C10_HOST_DEVICE __host__ __device__ +#define C10_DEVICE __device__ +#define C10_HOST __host__ +// constants from +// (https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#features-and-technical-specifications) +// The maximum number of threads per multiprocessor is 1024 for Turing +// architecture (7.5), 1536 for Geforce Ampere (8.6)/Jetson Orin (8.7), and +// 2048 for all other architectures. You'll get warnings if you exceed these +// constants. Hence, the following macros adjust the input values from the user +// to resolve potential warnings. +#if __CUDA_ARCH__ == 750 +constexpr uint32_t CUDA_MAX_THREADS_PER_SM = 1024; +#elif __CUDA_ARCH__ == 860 || __CUDA_ARCH__ == 870 || __CUDA_ARCH__ == 890 +constexpr uint32_t CUDA_MAX_THREADS_PER_SM = 1536; +#else +constexpr uint32_t CUDA_MAX_THREADS_PER_SM = 2048; +#endif +// CUDA_MAX_THREADS_PER_BLOCK is same for all architectures currently +constexpr uint32_t CUDA_MAX_THREADS_PER_BLOCK = 1024; +// CUDA_THREADS_PER_BLOCK_FALLBACK is the "canonical fallback" choice of block +// size. 256 is a good number for this fallback and should give good occupancy +// and versatility across all architectures. +constexpr uint32_t CUDA_THREADS_PER_BLOCK_FALLBACK = 256; +// NOTE: if you are thinking of constexpr-ify the inputs to launch bounds, it +// turns out that although __launch_bounds__ can take constexpr, it +// can't take a constexpr that has anything to do with templates. +// Currently we use launch_bounds that depend on template arguments in +// Loops.cuh, Reduce.cuh and LossCTC.cuh. Hence, C10_MAX_THREADS_PER_BLOCK +// and C10_MIN_BLOCKS_PER_SM are kept as macros. +// Suppose you were planning to write __launch_bounds__(a, b), based on your +// performance tuning on a modern GPU. Instead, you should write +// __launch_bounds__(C10_MAX_THREADS_PER_BLOCK(a), C10_MIN_BLOCKS_PER_SM(a, b)), +// which will also properly respect limits on old architectures. +#define C10_MAX_THREADS_PER_BLOCK(val) \ + (((val) <= CUDA_MAX_THREADS_PER_BLOCK) ? (val) \ + : CUDA_THREADS_PER_BLOCK_FALLBACK) +#define C10_MIN_BLOCKS_PER_SM(threads_per_block, blocks_per_sm) \ + ((((threads_per_block) * (blocks_per_sm) <= CUDA_MAX_THREADS_PER_SM) \ + ? (blocks_per_sm) \ + : ((CUDA_MAX_THREADS_PER_SM + (threads_per_block) - 1) / \ + (threads_per_block)))) +// C10_LAUNCH_BOUNDS is analogous to __launch_bounds__ +#define C10_LAUNCH_BOUNDS_0 \ + __launch_bounds__( \ + 256, 4) // default launch bounds that should give good occupancy and + // versatility across all architectures. +#define C10_LAUNCH_BOUNDS_1(max_threads_per_block) \ + __launch_bounds__((C10_MAX_THREADS_PER_BLOCK((max_threads_per_block)))) +#define C10_LAUNCH_BOUNDS_2(max_threads_per_block, min_blocks_per_sm) \ + __launch_bounds__( \ + (C10_MAX_THREADS_PER_BLOCK((max_threads_per_block))), \ + (C10_MIN_BLOCKS_PER_SM((max_threads_per_block), (min_blocks_per_sm)))) +#else +#define C10_HOST_DEVICE +#define C10_HOST +#define C10_DEVICE +#endif + +#if defined(USE_ROCM) +#define C10_HIP_HOST_DEVICE __host__ __device__ +#else +#define C10_HIP_HOST_DEVICE +#endif + +#if defined(USE_ROCM) +#define C10_WARP_SIZE warpSize // = 64 or 32 (Defined in hip_runtime.h) +#else +#define C10_WARP_SIZE 32 +#endif + +#if defined(_MSC_VER) && _MSC_VER <= 1900 +#define __func__ __FUNCTION__ +#endif + +// CUDA_KERNEL_ASSERT checks the assertion +// even when NDEBUG is defined. This is useful for important assertions in CUDA +// code that would otherwise be suppressed when building Release. +#if defined(__ANDROID__) || defined(__APPLE__) || defined(__FreeBSD__) +// Those platforms do not support assert() +#define CUDA_KERNEL_ASSERT(cond) +#define CUDA_KERNEL_ASSERT_MSG(cond, msg) +#define SYCL_KERNEL_ASSERT(cond) +#elif defined(_MSC_VER) +#if defined(NDEBUG) +extern "C" { +C10_IMPORT +#if defined(__SYCL_DEVICE_ONLY__) +extern SYCL_EXTERNAL void _wassert( + const wchar_t* wexpr, + const wchar_t* wfile, + unsigned line); +#else +#if defined(__CUDA_ARCH__) +__host__ __device__ +#endif // __CUDA_ARCH__ + void + _wassert(wchar_t const* _Message, wchar_t const* _File, unsigned _Line); +#endif // __SYCL_DEVICE_ONLY__ +} +#endif // NDEBUG +#define CUDA_KERNEL_ASSERT(cond) \ + if (C10_UNLIKELY(!(cond))) { \ + (void)(_wassert( \ + _CRT_WIDE(#cond), \ + _CRT_WIDE(__FILE__), \ + static_cast(__LINE__)), \ + 0); \ + } +// TODO: This doesn't assert the message because I (chilli) couldn't figure out +// a nice way to convert a char* to a wchar_t* +#define CUDA_KERNEL_ASSERT_MSG(cond, msg) \ + if (C10_UNLIKELY(!(cond))) { \ + (void)(_wassert( \ + _CRT_WIDE(#cond), \ + _CRT_WIDE(__FILE__), \ + static_cast(__LINE__)), \ + 0); \ + } +#define SYCL_KERNEL_ASSERT(cond) \ + if (C10_UNLIKELY(!(cond))) { \ + (void)(_wassert( \ + _CRT_WIDE(#cond), \ + _CRT_WIDE(__FILE__), \ + static_cast(__LINE__)), \ + 0); \ + } +#else // __APPLE__, _MSC_VER +#if defined(NDEBUG) +extern "C" { +#if defined(__SYCL_DEVICE_ONLY__) +extern SYCL_EXTERNAL void __assert_fail( + const char* expr, + const char* file, + unsigned int line, + const char* func); +#else // __SYCL_DEVICE_ONLY__ +#if (defined(__CUDA_ARCH__) && !(defined(__clang__) && defined(__CUDA__))) +// CUDA supports __assert_fail function which are common for both device +// and host side code. +__host__ __device__ +#endif + + // This forward declaration matching the declaration of __assert_fail + // exactly how it is in glibc in case parts of the program are compiled with + // different NDEBUG settings. Otherwise we might get 'ambiguous declaration' + // error. Note: On ROCm - this declaration serves for host side compilation. + void + __assert_fail( + const char* assertion, + const char* file, + unsigned int line, + const char* function) noexcept __attribute__((__noreturn__)); + +#endif // __SYCL_DEVICE_ONLY__ +} +#endif // NDEBUG +// ROCm disables kernel assert by default for performance considerations. +// Though ROCm supports __assert_fail, it uses kernel printf which has +// a non-negligible performance impact even if the assert condition is +// never triggered. We choose to use abort() instead which will still +// terminate the application but without a more useful error message. +#if !defined(C10_USE_ROCM_KERNEL_ASSERT) and defined(USE_ROCM) +#define CUDA_KERNEL_ASSERT(cond) \ + if C10_UNLIKELY (!(cond)) { \ + abort(); \ + } +#define CUDA_KERNEL_ASSERT_MSG(cond, msg) \ + if C10_UNLIKELY (!(cond)) { \ + abort(); \ + } +#define SYCL_KERNEL_ASSERT(cond) \ + if C10_UNLIKELY (!(cond)) { \ + abort(); \ + } +#else +#define CUDA_KERNEL_ASSERT(cond) \ + if (C10_UNLIKELY(!(cond))) { \ + __assert_fail( \ + #cond, __FILE__, static_cast(__LINE__), __func__); \ + } +#define CUDA_KERNEL_ASSERT_MSG(cond, msg) \ + if (C10_UNLIKELY(!(cond))) { \ + __assert_fail( \ + msg, __FILE__, static_cast(__LINE__), __func__); \ + } +#define SYCL_KERNEL_ASSERT(cond) \ + if (C10_UNLIKELY(!(cond))) { \ + __assert_fail( \ + #cond, __FILE__, static_cast(__LINE__), __func__); \ + } +#endif // C10_USE_ROCM_KERNEL_ASSERT and USE_ROCM +#endif // __APPLE__ + +#ifdef __APPLE__ +#include +#endif + +#if defined(__ANDROID__) +#define C10_ANDROID 1 +#define C10_MOBILE 1 +#elif ( \ + defined(__APPLE__) && \ + (TARGET_IPHONE_SIMULATOR || TARGET_OS_SIMULATOR || TARGET_OS_IPHONE)) +#define C10_IOS 1 +#define C10_MOBILE 1 +#endif // ANDROID / IOS + +#if defined(C10_MOBILE) && C10_MOBILE +#define C10_ALWAYS_INLINE_UNLESS_MOBILE inline +#else +#define C10_ALWAYS_INLINE_UNLESS_MOBILE C10_ALWAYS_INLINE +#endif + +#if !defined(FBCODE_CAFFE2) && !defined(C10_NODEPRECATED) +#define CONSTEXPR_EXCEPT_WIN_CUDA constexpr +#define C10_HOST_CONSTEXPR_EXCEPT_WIN_CUDA constexpr + +#define STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(field, val) \ + static constexpr const char field[] = val; +#define STATIC_CONST_STR_OUT_OF_LINE_FOR_WIN_CUDA(cls, field, val) +#endif // !defined(FBCODE_CAFFE2) && !defined(C10_NODEPRECATED) + +#ifndef HAS_DEMANGLE +#if defined(__ANDROID__) || defined(_WIN32) || defined(__EMSCRIPTEN__) +#define HAS_DEMANGLE 0 +#elif defined(__APPLE__) && \ + (TARGET_IPHONE_SIMULATOR || TARGET_OS_SIMULATOR || TARGET_OS_IPHONE) +#define HAS_DEMANGLE 0 +#else +#define HAS_DEMANGLE 1 +#endif +#endif // HAS_DEMANGLE + +#define _C10_PRAGMA__(string) _Pragma(#string) +#define _C10_PRAGMA_(string) _C10_PRAGMA__(string) + +#ifdef __clang__ +#define C10_CLANG_DIAGNOSTIC_PUSH() _Pragma("clang diagnostic push") +#define C10_CLANG_DIAGNOSTIC_POP() _Pragma("clang diagnostic pop") +#define C10_CLANG_DIAGNOSTIC_IGNORE(flag) \ + _C10_PRAGMA_(clang diagnostic ignored flag) +#define C10_CLANG_HAS_WARNING(flag) __has_warning(flag) +#else +#define C10_CLANG_DIAGNOSTIC_PUSH() +#define C10_CLANG_DIAGNOSTIC_POP() +#define C10_CLANG_DIAGNOSTIC_IGNORE(flag) +#define C10_CLANG_HAS_WARNING(flag) 0 +#endif + +#ifdef __clang__ + +#define C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED(warning) \ + _C10_PRAGMA_(clang diagnostic push) \ + _C10_PRAGMA_(clang diagnostic ignored "-Wunknown-warning-option") \ + _C10_PRAGMA_(clang diagnostic ignored warning) + +#define C10_DIAGNOSTIC_POP() _C10_PRAGMA_(clang diagnostic pop) + +#elif __GNUC__ + +#define C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED(warning) \ + _C10_PRAGMA_(GCC diagnostic push) \ + _C10_PRAGMA_(GCC diagnostic ignored "-Wpragmas") \ + _C10_PRAGMA_(GCC diagnostic ignored warning) + +#define C10_DIAGNOSTIC_POP() _C10_PRAGMA_(GCC diagnostic pop) + +#else + +#define C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED(warning) +#define C10_DIAGNOSTIC_POP() + +#endif + +// This macro is used to find older C++ compilers +// that don't support move optimization for return values. + +#if (defined(__GNUC__) && __GNUC__ < 13) || \ + (defined(__clang_major__) && __clang_major__ < 13) +#define C10_RETURN_MOVE_IF_OLD_COMPILER 1 +#else +#define C10_RETURN_MOVE_IF_OLD_COMPILER 0 +#endif + +#endif // C10_MACROS_MACROS_H_ diff --git a/phivenv/Lib/site-packages/torch/include/c10/macros/cmake_macros.h b/phivenv/Lib/site-packages/torch/include/c10/macros/cmake_macros.h new file mode 100644 index 0000000000000000000000000000000000000000..45bba88997cc9f1645a27c82e63fa51ad6bf3ddd --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/macros/cmake_macros.h @@ -0,0 +1,14 @@ +#ifndef C10_MACROS_CMAKE_MACROS_H_ +#define C10_MACROS_CMAKE_MACROS_H_ + +// Automatically generated header file for the C10 library. +// Do not include this file directly. Instead, include c10/macros/Macros.h. + +#define C10_BUILD_SHARED_LIBS +/* #undef C10_USE_GLOG */ +/* #undef C10_USE_GFLAGS */ +/* #undef C10_USE_NUMA */ +/* #undef C10_USE_MSVC_STATIC_RUNTIME */ +/* #undef C10_USE_ROCM_KERNEL_ASSERT */ + +#endif // C10_MACROS_CMAKE_MACROS_H_ diff --git a/phivenv/Lib/site-packages/torch/include/c10/metal/atomic.h b/phivenv/Lib/site-packages/torch/include/c10/metal/atomic.h new file mode 100644 index 0000000000000000000000000000000000000000..859f05e125b5fabecd062de29211b1e2b4c8e531 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/metal/atomic.h @@ -0,0 +1,105 @@ +#pragma once +#include +namespace c10 { +namespace metal { + +// Atomic operations helper +template +struct AtomicType {}; +template +using AtomicType_t = typename AtomicType::type; + +template <> +struct AtomicType { + using type = ::metal::atomic; + static inline void atomic_add(device type* data, long offset, float value) { + ::metal::atomic_fetch_add_explicit( + data + offset, value, ::metal::memory_order_relaxed); + } +}; + +template <> +struct AtomicType { + using type = ::metal::atomic; + static inline void atomic_add(device type* data, long offset, int value) { + ::metal::atomic_fetch_add_explicit( + data + offset, value, ::metal::memory_order_relaxed); + } +}; + +// As of Metal3.2 atomic operations are not supported on half-precision floats, +// so they must be simulated Using atomic compare and exchange over 32-bit +// atomic type +template +static inline void atomic_add_helper( + device ::metal::atomic* data, + long offset, + T value) { + auto ptr = data + (offset >> 1); + auto old = ::metal::atomic_load_explicit(ptr, ::metal::memory_order_relaxed); + union { + uint i; + T t[2]; + } val; + do { + val.i = old; + val.t[offset & 1] += value; + } while (!::metal::atomic_compare_exchange_weak_explicit( + ptr, + &old, + val.i, + ::metal::memory_order_relaxed, + ::metal::memory_order_relaxed)); +} + +template <> +struct AtomicType { + using type = ::metal::atomic; + static inline void atomic_add(device type* data, long offset, half value) { + atomic_add_helper(data, offset, value); + } +}; + +#if __METAL_VERSION__ >= 310 +template <> +struct AtomicType { + using type = ::metal::atomic; + static inline void atomic_add(device type* data, long offset, bfloat value) { + atomic_add_helper(data, offset, value); + } +}; +#endif + +// Metal supports atomic_store_explicit for bools, but +// sizeof(::metal::atomic_bool) is 4 Therefore it could not be used to +// atomically modify unaligned memory, so fall back to compare and exchange +// trick As accumulation over booleans are just or operation, do nothing if +// value is false +template <> +struct AtomicType { + using type = ::metal::atomic; + static inline void atomic_add(device type* data, long offset, bool value) { + if (!value) { + return; + } + auto ptr = data + (offset >> 2); + auto old = + ::metal::atomic_load_explicit(ptr, ::metal::memory_order_relaxed); + union { + uint i; + bool t[4]; + } val; + do { + val.i = old; + val.t[offset & 3] = true; + } while (!::metal::atomic_compare_exchange_weak_explicit( + ptr, + &old, + val.i, + ::metal::memory_order_relaxed, + ::metal::memory_order_relaxed)); + } +}; + +} // namespace metal +} // namespace c10 diff --git a/phivenv/Lib/site-packages/torch/include/c10/metal/common.h b/phivenv/Lib/site-packages/torch/include/c10/metal/common.h new file mode 100644 index 0000000000000000000000000000000000000000..c783c495d52a696e1a01d9c42abb67614f8fc4a0 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/metal/common.h @@ -0,0 +1,48 @@ +#pragma once +// Set of global constants that could be shareable between CPU and Metal code + +#ifdef __METAL__ +#define C10_METAL_CONSTEXPR constant constexpr +#else +#define C10_METAL_CONSTEXPR constexpr +#endif + +#if !defined(__METAL__) || __METAL_VERSION__ >= 310 +#define C10_METAL_ALL_TYPES_FUNCTOR(_) \ + _(Byte, 0) \ + _(Char, 1) \ + _(Short, 2) \ + _(Int, 3) \ + _(Long, 4) \ + _(Half, 5) \ + _(Float, 6) \ + _(ComplexHalf, 8) \ + _(ComplexFloat, 9) \ + _(Bool, 11) \ + _(BFloat16, 15) +#else +#define C10_METAL_ALL_TYPES_FUNCTOR(_) \ + _(Byte, 0) \ + _(Char, 1) \ + _(Short, 2) \ + _(Int, 3) \ + _(Long, 4) \ + _(Half, 5) \ + _(Float, 6) \ + _(ComplexHalf, 8) \ + _(ComplexFloat, 9) \ + _(Bool, 11) +#endif + +namespace c10 { +namespace metal { +C10_METAL_CONSTEXPR unsigned max_ndim = 16; + +enum class ScalarType { +#define _DEFINE_ENUM_VAL_(_v, _n) _v = _n, + C10_METAL_ALL_TYPES_FUNCTOR(_DEFINE_ENUM_VAL_) +#undef _DEFINE_ENUM_VAL_ +}; + +} // namespace metal +} // namespace c10 diff --git a/phivenv/Lib/site-packages/torch/include/c10/metal/expm1f.h b/phivenv/Lib/site-packages/torch/include/c10/metal/expm1f.h new file mode 100644 index 0000000000000000000000000000000000000000..9b96b2e1f861a241528eb98ddbe3a24750b7d100 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/metal/expm1f.h @@ -0,0 +1,97 @@ +// Copy-and-pasted from: +// https://github.com/ml-explore/mlx/blob/99c33d011d63174f50cea37c3eede002958be6d3/mlx/backend/metal/kernels/expm1f.h + +#pragma once + +#include + +// Original license copied below: +// Copyright (c) 2015-2023 Norbert Juffa +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +namespace c10 { +namespace metal { + +/* Compute exponential base e minus 1. Maximum ulp error = 0.997458 + + i = rint(a/log(2)), f = a-i*log(2). Then expm1(a) = 2**i * (expm1(f)+1) - 1. + Compute r = expm1(f). Then expm1(a)= 2 * (0.5 * 2**i * r + 0.5 * 2**i - 0.5). + With t = 0.5*2**i, expm1(a) = 2*(r * t + t-0.5). However, for best accuracy, + when i == 1, expm1(a)= 2*(r + 0.5), and when i == 0, expm1(a) = r. + + NOTE: Scale factor b is only applied if i < 0 or i > 1 (should be power of 2) +*/ +inline float expm1f_scaled_unchecked(float a, float b) { + float f, j, r, s, t, u, v, x, y; + int i; + + // exp(a) = 2**i * exp(f); i = rintf (a / log(2)) + j = ::metal::fma(1.442695f, a, 12582912.f); // 0x1.715476p0, 0x1.8p23 + j = j - 12582912.0f; // 0x1.8p23 + i = (int)j; + f = ::metal::fma(j, -6.93145752e-1f, a); + + // approximate r = exp(f)-1 on interval [-log(2)/2, +log(2)/2] + s = f * f; + if (a == 0.0f) + s = a; // ensure -0 is passed through + // err = 0.997458 ulp1 = 11081805 + r = 1.97350979e-4f; // 0x1.9de000p-13 + r = ::metal::fma(r, f, 1.39309070e-3f); // 0x1.6d30bcp-10 + r = ::metal::fma(r, f, 8.33343994e-3f); // 0x1.1111f6p-7 + r = ::metal::fma(r, f, 4.16668020e-2f); // 0x1.55559ep-5 + r = ::metal::fma(r, f, 1.66666716e-1f); // 0x1.55555cp-3 + r = ::metal::fma(r, f, 4.99999970e-1f); // 0x1.fffffep-2 + u = (j == 1) ? (f + 0.5f) : f; + v = ::metal::fma(r, s, u); + s = 0.5f * b; + t = ::metal::ldexp(s, i); + y = t - s; + x = (t - y) - s; // double-float canonicalization of difference + r = ::metal::fma(v, t, x) + y; + r = r + r; + if (j == 0) + r = v; + if (j == 1) + r = v + v; + return r; +} + +/* Compute exponential base e minus 1. max ulp err = 0.99746 */ +inline float expm1f(float a) { + float r; + + r = expm1f_scaled_unchecked(a, 1.0f); + /* handle severe overflow and underflow */ + if (::metal::abs(a - 1.0f) > 88.0f) { + r = ::metal::pow(2, a); + r = ::metal::fma(r, r, -1.0f); + } + return r; +} + +} // namespace metal +} // namespace c10 diff --git a/phivenv/Lib/site-packages/torch/include/c10/metal/indexing.h b/phivenv/Lib/site-packages/torch/include/c10/metal/indexing.h new file mode 100644 index 0000000000000000000000000000000000000000..6363c4ddd685f20ba6b249f64f20ebaec9efead5 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/metal/indexing.h @@ -0,0 +1,481 @@ +// Metal indexing primitives +#pragma once +#include +#include +#include + +namespace c10 { +namespace metal { + +// Given coordinates and strides, calculates offset from the start of the +// tensors +template +inline T offset_from_coord( + thread T idx[max_ndim], + constant long* strides, + uint ndim) { + T rc = 0; + for (uint i = 0; i < ndim; ++i) { + rc += idx[i] * T(strides[i]); + } + return rc; +} + +// Given thread index calculates position in the ndim tensor +template +inline void pos_from_thread_index( + T idx, + thread T pos[max_ndim], + constant long* sizes, + uint ndim) { + for (uint i = 0; i < ndim; ++i) { + pos[i] = idx % T(sizes[i]); + idx /= T(sizes[i]); + } +} + +inline long offset_from_thread_index( + long idx, + constant long* sizes, + constant long* strides, + uint ndim) { + long pos[max_ndim]; + pos_from_thread_index(idx, pos, sizes, ndim); + return offset_from_coord(pos, strides, ndim); +} + +template +kernel void unary_dense( + device result_of* output [[buffer(0)]], + constant T* input [[buffer(1)]], + uint index [[thread_position_in_grid]]) { + F f; + output[index] = f(input[index]); +} + +template +kernel void unary_strided( + device result_of* output [[buffer(0)]], + constant T* input [[buffer(1)]], + constant long* sizes [[buffer(2)]], + constant long* input_strides [[buffer(3)]], + constant long* output_strides [[buffer(4)]], + constant uint& ndim [[buffer(5)]], + uint index [[thread_position_in_grid]]) { + F f; + int pos[max_ndim]; + pos_from_thread_index(int(index), pos, sizes, ndim); + const auto input_offs = offset_from_coord(pos, input_strides, ndim); + const auto output_offs = offset_from_coord(pos, output_strides, ndim); + output[output_offs] = f(input[input_offs]); +} + +#define REGISTER_UNARY_OP(NAME, DTYPE0, DTYPE1) \ + static_assert( \ + ::metal:: \ + is_same_v>, \ + "Output dtype mismatch for unary op " #NAME " and input " #DTYPE0); \ + template [[host_name(#NAME "_dense_" #DTYPE1 "_" #DTYPE0)]] kernel void :: \ + c10::metal::unary_dense( \ + device ::c10::metal::result_of * output, \ + constant DTYPE0 * input, \ + uint index); \ + template [[host_name(#NAME "_strided_" #DTYPE1 "_" #DTYPE0)]] kernel void :: \ + c10::metal::unary_strided( \ + device ::c10::metal::result_of * output, \ + constant DTYPE0 * input, \ + constant long* sizes, \ + constant long* input_strides, \ + constant long* output_strides, \ + constant uint& ndim, \ + uint index) + +#define DEFINE_UNARY_FLOATING_FUNCTOR(NAME) \ + struct NAME##_functor { \ + template \ + inline ::metal::enable_if_t<::metal::is_floating_point_v, T> operator()( \ + const T x) { \ + return T(NAME(x)); \ + } \ + template \ + inline ::metal::enable_if_t<::metal::is_integral_v, float> operator()( \ + const T x) { \ + return NAME(static_cast(x)); \ + } \ + } + +template +kernel void unary_alpha_dense( + device result_of* output [[buffer(0)]], + constant T* input [[buffer(1)]], + constant T2& alpha [[buffer(2)]], + uint index [[thread_position_in_grid]]) { + F f; + output[index] = f(input[index], alpha); +} + +template +kernel void unary_alpha_strided( + device result_of* output [[buffer(0)]], + constant T* input [[buffer(1)]], + constant long* sizes [[buffer(2)]], + constant long* input_strides [[buffer(3)]], + constant long* output_strides [[buffer(4)]], + constant uint& ndim [[buffer(5)]], + constant T2& alpha [[buffer(6)]], + uint index [[thread_position_in_grid]]) { + F f; + int pos[max_ndim]; + pos_from_thread_index(int(index), pos, sizes, ndim); + const auto input_offs = offset_from_coord(pos, input_strides, ndim); + const auto output_offs = offset_from_coord(pos, output_strides, ndim); + output[output_offs] = f(input[input_offs], alpha); +} + +#define REGISTER_UNARY_ALPHA_OP(NAME, DTYPEI, DTYPEA, DTYPEO) \ + static_assert( \ + ::metal::is_same_v< \ + DTYPEO, \ + ::c10::metal::result_of>, \ + "Output dtype mismatch for unary op " #NAME " and input " #DTYPEI); \ + template [[host_name(#NAME "_dense_" #DTYPEO "_" #DTYPEI \ + "_" #DTYPEA)]] kernel void ::c10::metal:: \ + unary_alpha_dense( \ + device ::c10::metal::result_of * \ + output, \ + constant DTYPEI * input, \ + constant DTYPEA & alpha, \ + uint index); \ + template [[host_name(#NAME "_strided_" #DTYPEO "_" #DTYPEI \ + "_" #DTYPEA)]] kernel void ::c10::metal:: \ + unary_alpha_strided( \ + device ::c10::metal::result_of * \ + output, \ + constant DTYPEI * input, \ + constant long* sizes, \ + constant long* input_strides, \ + constant long* output_strides, \ + constant uint& ndim, \ + constant DTYPEA& alpha, \ + uint index) + +template +inline T val_at_offs(constant void* ptr, long offs) { + return *reinterpret_cast( + static_cast(ptr) + offs); +} + +// Value at offset with dynamic cast from provided type +template +inline T val_at_offs(constant void* ptr, long offs, ScalarType type) { + switch (type) { + case ScalarType::Bool: + return cast_to(val_at_offs(ptr, offs)); + case ScalarType::Byte: + return cast_to(val_at_offs(ptr, offs)); + case ScalarType::Char: + return cast_to(val_at_offs(ptr, offs)); + case ScalarType::Short: + return cast_to(val_at_offs(ptr, offs)); + case ScalarType::Int: + return cast_to(val_at_offs(ptr, offs)); + case ScalarType::Long: + return cast_to(val_at_offs(ptr, offs)); + // Floats + case ScalarType::Float: + return cast_to(val_at_offs(ptr, offs)); + case ScalarType::Half: + return cast_to(val_at_offs(ptr, offs)); +#if __METAL_VERSION__ >= 310 + case ScalarType::BFloat16: + return cast_to(val_at_offs(ptr, offs)); +#endif + // Complex + case ScalarType::ComplexHalf: + return cast_to(val_at_offs(ptr, offs)); + case ScalarType::ComplexFloat: + return cast_to(val_at_offs(ptr, offs)); + } +} + +template +inline device T& ref_at_offs(device void* ptr, long offs) { + return *reinterpret_cast(static_cast(ptr) + offs); +} + +// Binary elementwise ops kernels +// Right now there are 4 flavors available: +// - binary_dense where both input, other and output are dense and share the +// same type +// - binary_strided when all inputs are of the same types, but some elements are +// strided +// - binary_dense_cast - inputs are dense, but of different dtypes +// - binary_strided_cast - inputs or output are strided and of different dtypes +// TODO: Look like binary_dense_scalar are frequently used specialization that +// should be added Pulse 4 variants of the same, but that accept optional +// `alpha` parameter +// (currently only used add/sub/lerp.Scalar) +// Note about accuracy (for more info see +// https://github.com/pytorch/pytorch/issues/152736) Sometimes when kernel is +// invoked to produce `half` output, but one of the arguments is float arguments +// should be upcast to float, rather than downcast to half At the moment this is +// expressed with `om_t` optional argument (which stands for opmath_type) which +// is identical to output type but could be something else + +template +kernel void binary_strided( + device void* output [[buffer(0)]], + constant void* input [[buffer(1)]], + constant void* other [[buffer(2)]], + constant long* sizes [[buffer(3)]], + constant long* output_strides [[buffer(4)]], + constant long* input_strides [[buffer(5)]], + constant long* other_strides [[buffer(6)]], + constant uint3& ndim [[buffer(7)]], + uint index [[thread_position_in_grid]]) { + F f; + using res_t = result_of; + int pos[max_ndim]; + pos_from_thread_index(int(index), pos, sizes, ndim.x); + const auto input_offs = offset_from_coord(pos, input_strides, ndim.x); + const auto other_offs = offset_from_coord(pos, other_strides, ndim.x); + const auto output_offs = offset_from_coord(pos, output_strides, ndim.x); + const auto a = val_at_offs(input, input_offs); + const auto b = val_at_offs(other, other_offs); + ref_at_offs(output, output_offs) = + static_cast(f(om_t(a), om_t(b))); +} + +template +kernel void binary_alpha_strided( + device void* output [[buffer(0)]], + constant void* input [[buffer(1)]], + constant void* other [[buffer(2)]], + constant T2& alpha [[buffer(3)]], + constant long* sizes [[buffer(4)]], + constant long* output_strides [[buffer(5)]], + constant long* input_strides [[buffer(6)]], + constant long* other_strides [[buffer(7)]], + constant uint3& ndim [[buffer(8)]], + uint index [[thread_position_in_grid]]) { + F f; + int pos[max_ndim]; + pos_from_thread_index(int(index), pos, sizes, ndim.x); + const auto input_offs = offset_from_coord(pos, input_strides, ndim.x); + const auto other_offs = offset_from_coord(pos, other_strides, ndim.x); + const auto output_offs = offset_from_coord(pos, output_strides, ndim.x); + const auto a = val_at_offs(input, input_offs); + const auto b = val_at_offs(other, other_offs); + ref_at_offs>(output, output_offs) = f(a, b, alpha); +} + +template > +kernel void binary_strided_cast( + device void* output [[buffer(0)]], + constant void* input [[buffer(1)]], + constant void* other [[buffer(2)]], + constant long* sizes [[buffer(3)]], + constant long* output_strides [[buffer(4)]], + constant long* input_strides [[buffer(5)]], + constant long* other_strides [[buffer(6)]], + constant uint4& ndim_types [[buffer(7)]], + uint index [[thread_position_in_grid]]) { + F f; + using res_t = result_of; + int pos[max_ndim]; + pos_from_thread_index(int(index), pos, sizes, ndim_types.x); + const auto input_offs = offset_from_coord(pos, input_strides, ndim_types.x); + const auto other_offs = offset_from_coord(pos, other_strides, ndim_types.x); + const auto output_offs = offset_from_coord(pos, output_strides, ndim_types.x); + const auto a = val_at_offs( + input, input_offs, static_cast(ndim_types.y)); + const auto b = val_at_offs( + other, other_offs, static_cast(ndim_types.z)); + ref_at_offs(output, output_offs) = static_cast(f(a, b)); +} + +template +kernel void binary_alpha_strided_cast( + device void* output [[buffer(0)]], + constant void* input [[buffer(1)]], + constant void* other [[buffer(2)]], + constant T2& alpha [[buffer(3)]], + constant long* sizes [[buffer(4)]], + constant long* output_strides [[buffer(5)]], + constant long* input_strides [[buffer(6)]], + constant long* other_strides [[buffer(7)]], + constant uint4& ndim_types [[buffer(8)]], + uint index [[thread_position_in_grid]]) { + F f; + int pos[max_ndim]; + pos_from_thread_index(int(index), pos, sizes, ndim_types.x); + const auto input_offs = offset_from_coord(pos, input_strides, ndim_types.x); + const auto other_offs = offset_from_coord(pos, other_strides, ndim_types.x); + const auto output_offs = offset_from_coord(pos, output_strides, ndim_types.x); + const auto a = + val_at_offs(input, input_offs, static_cast(ndim_types.y)); + const auto b = + val_at_offs(other, other_offs, static_cast(ndim_types.z)); + ref_at_offs>(output, output_offs) = f(a, b, alpha); +} + +template > +kernel void binary_dense( + device result_of* out [[buffer(0)]], + constant T* input [[buffer(1)]], + constant T* other [[buffer(2)]], + uint tid [[thread_position_in_grid]]) { + F f; + using res_t = result_of; + out[tid] = static_cast(f(om_t(input[tid]), om_t(other[tid]))); +} + +template +kernel void binary_alpha_dense( + device result_of* out [[buffer(0)]], + constant T* input [[buffer(1)]], + constant T* other [[buffer(2)]], + constant T2& alpha [[buffer(3)]], + uint tid [[thread_position_in_grid]]) { + F f; + out[tid] = f(input[tid], other[tid], alpha); +} + +template +kernel void binary_dense_cast( + device result_of* out [[buffer(0)]], + constant void* input [[buffer(1)]], + constant void* other [[buffer(2)]], + constant uint4& sizes_types [[buffer(3)]], + uint tid [[thread_position_in_grid]]) { + F f; + using res_t = result_of; + const auto a = val_at_offs( + input, tid * sizes_types.x, static_cast(sizes_types.z)); + const auto b = val_at_offs( + other, tid * sizes_types.y, static_cast(sizes_types.w)); + out[tid] = static_cast(f(a, b)); +} + +template +kernel void binary_alpha_dense_cast( + device result_of* out [[buffer(0)]], + constant void* input [[buffer(1)]], + constant void* other [[buffer(2)]], + constant T2& alpha [[buffer(3)]], + constant uint4& sizes_types [[buffer(4)]], + uint tid [[thread_position_in_grid]]) { + F f; + const auto a = val_at_offs( + input, tid * sizes_types.x, static_cast(sizes_types.z)); + const auto b = val_at_offs( + other, tid * sizes_types.y, static_cast(sizes_types.w)); + out[tid] = f(a, b, alpha); +} + +#define REGISTER_BINARY_OP_(NAME, DTYPEI, DTYPEO, OMT) \ + static_assert( \ + ::metal::is_same_v< \ + DTYPEO, \ + ::c10::metal::result_of>, \ + "Output dtype mismatch for binary op " #NAME " and input " #DTYPEI); \ + template [[host_name(#NAME "_strided_" #DTYPEO "_" #DTYPEI)]] kernel void :: \ + c10::metal::binary_strided( \ + device void* out, \ + constant void* input, \ + constant void* other, \ + constant long* sizes, \ + constant long* output_strides, \ + constant long* input_strides, \ + constant long* other_strides, \ + constant uint3& ndim, \ + uint tid); \ + template [[host_name(#NAME "_strided_cast_" #DTYPEI)]] kernel void ::c10:: \ + metal::binary_strided_cast( \ + device void* out, \ + constant void* input, \ + constant void* other, \ + constant long* sizes, \ + constant long* output_strides, \ + constant long* input_strides, \ + constant long* other_strides, \ + constant uint4& ndim_types, \ + uint tid); \ + template [[host_name(#NAME "_dense_" #DTYPEO "_" #DTYPEI)]] kernel void :: \ + c10::metal::binary_dense( \ + device ::c10::metal::result_of * \ + out_, \ + constant DTYPEI * input_, \ + constant DTYPEI * other_, \ + uint tid); \ + template [[host_name(#NAME "_dense_cast_" #DTYPEI)]] kernel void ::c10:: \ + metal::binary_dense_cast( \ + device ::c10::metal::result_of * \ + out_, \ + constant void* input, \ + constant void* other, \ + constant uint4& sizes_types, \ + uint tid) + +// OpMath Binary Op promotes inputs to higher precision type before Functor call +#define REGISTER_OPMATH_BINARY_OP(NAME, DTYPEI, DTYPEO) \ + REGISTER_BINARY_OP_(NAME, DTYPEI, DTYPEO, ::c10::metal::opmath_t) + +#define REGISTER_BINARY_OP(NAME, DTYPEI, DTYPEO) \ + REGISTER_BINARY_OP_(NAME, DTYPEI, DTYPEO, DTYPEI) + +#define REGISTER_BINARY_ALPHA_OP(NAME, DTYPEI, DTYPEA, DTYPEO) \ + static_assert( \ + ::metal::is_same_v< \ + DTYPEO, \ + ::c10::metal::result_of>, \ + "Output dtype mismatch for binary op " #NAME " and input " #DTYPEI); \ + template [[host_name(#NAME "_strided_" #DTYPEO "_" #DTYPEI \ + "_" #DTYPEA)]] kernel void ::c10::metal:: \ + binary_alpha_strided( \ + device void* out, \ + constant void* input, \ + constant void* other, \ + constant DTYPEA& alpha, \ + constant long* sizes, \ + constant long* output_strides, \ + constant long* input_strides, \ + constant long* other_strides, \ + constant uint3& ndim, \ + uint tid); \ + template [[host_name(#NAME "_strided_cast_" #DTYPEI \ + "_" #DTYPEA)]] kernel void ::c10::metal:: \ + binary_alpha_strided_cast( \ + device void* out, \ + constant void* input, \ + constant void* other, \ + constant DTYPEA& alpha, \ + constant long* sizes, \ + constant long* output_strides, \ + constant long* input_strides, \ + constant long* other_strides, \ + constant uint4& ndim_types, \ + uint tid); \ + template [[host_name(#NAME "_dense_" #DTYPEO "_" #DTYPEI \ + "_" #DTYPEA)]] kernel void ::c10::metal:: \ + binary_alpha_dense( \ + device ::c10::metal:: \ + result_of * \ + out_, \ + constant DTYPEI * input_, \ + constant DTYPEI * other_, \ + constant DTYPEA & alpha, \ + uint tid); \ + template \ + [[host_name(#NAME "_dense_cast_" #DTYPEI "_" #DTYPEA)]] kernel void :: \ + c10::metal::binary_alpha_dense_cast( \ + device ::c10::metal:: \ + result_of * \ + out_, \ + constant void* input, \ + constant void* other, \ + constant DTYPEA& alpha, \ + constant uint4& sizes_types, \ + uint tid) +} // namespace metal +} // namespace c10 diff --git a/phivenv/Lib/site-packages/torch/include/c10/metal/random.h b/phivenv/Lib/site-packages/torch/include/c10/metal/random.h new file mode 100644 index 0000000000000000000000000000000000000000..a70a5455fd907c4003332504cf2c82ebb1987771 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/metal/random.h @@ -0,0 +1,78 @@ +// Philox Counter based RNG implementation for Metal +// Borrowed from aten/src/ATen/core/PhiloxRNGEngine.h +// Which in turn borrowed from +// http://www.thesalmons.org/john/random123/papers/random123sc11.pdf +#pragma once +#include + +namespace c10 { +namespace metal { + +namespace detail { + +constexpr float uint32_to_uniform_float(uint32_t value) { + // maximum value such that `MAX_INT * scale < 1.0` (with float rounding) + constexpr float scale = 4.6566127342e-10; + return static_cast(value & 0x7FFFFFFF) * scale; +} + +inline uint2 splitlong(ulong v) { + return uint2(v >> 32, v & 0xffffffff); +} + +} // namespace detail + +namespace philox4 { + +uint2 mulhilo(uint a, uint b) { + auto rc = static_cast(a) * b; + return detail::splitlong(rc); +} +uint4 single_round(uint4 ctr, uint2 key) { + constexpr uint kPhiloxSA = 0xD2511F53; + constexpr uint kPhiloxSB = 0xCD9E8D57; + auto rc0 = mulhilo(kPhiloxSA, ctr.x); + auto rc1 = mulhilo(kPhiloxSB, ctr.z); + return uint4(rc1.y ^ ctr.y ^ key.x, rc1.x, rc0.y ^ ctr.w ^ key.y, rc0.x); +} + +uint4 multiple_rounds(uint4 ctr, uint2 key, uint rounds) { + constexpr uint2 kPhilox10 = {0x9E3779B9, 0xBB67AE85}; + for (uint round = 0; round < rounds - 1; ++round) { + ctr = single_round(ctr, key); + key += kPhilox10; + } + return ctr; +} + +uint4 rand(long seed, long index) { + uint4 ctr = 0; + ctr.zw = detail::splitlong(index); + return multiple_rounds(ctr, detail::splitlong(seed), 10); +} + +} // namespace philox4 + +float randn(long seed, long index) { + auto value = philox4::rand(seed, index); + float u1 = 1.0 - detail::uint32_to_uniform_float(value.x); + float u2 = 1.0 - detail::uint32_to_uniform_float(value.y); + return ::metal::sqrt(-2.0 * ::metal::log(u1)) * + ::metal::cos(2.0 * M_PI_F * u2); +} + +float rand(long seed, long index) { + auto value = philox4::rand(seed, index); + return detail::uint32_to_uniform_float(value.x); +} + +long randint64(long seed, long index, long low, long high) { + auto range = high - low; + auto value = philox4::rand(seed, index); + // TODO: Implement better algorithm for large ranges + return low + + static_cast(detail::uint32_to_uniform_float(value.x) * range); +} + +} // namespace metal +} // namespace c10 diff --git a/phivenv/Lib/site-packages/torch/include/c10/metal/reduction_utils.h b/phivenv/Lib/site-packages/torch/include/c10/metal/reduction_utils.h new file mode 100644 index 0000000000000000000000000000000000000000..613636be03220ad539d05994522622086004ae68 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/metal/reduction_utils.h @@ -0,0 +1,177 @@ +#pragma once + +#include +#include + +namespace c10 { +namespace metal { + +constant constexpr ushort simdgroup_size = 32; + +template +inline ::metal::enable_if_t, T> simd_sum(T val) { + return ::metal::simd_sum(val); +} + +template +inline ::metal::enable_if_t, T> simd_prod(T val) { + return ::metal::simd_product(val); +} + +// Metal does not support SIMD reductions over 64-bit types, but it could be +// implement using simd_shuffle_down, that yields result in log2(simdgroup_size) +// iterations Use fill variant, as shuffle down returns garbage if inactive +// thread is referenced (on M1/M2, works fine on M4) and broadcast result to all +// threads in the end. Implementation heavily borrows from +// https://github.com/ml-explore/mlx/blob/86389bf9707f46101af45d90510e8e97c8a90b93/mlx/backend/metal/kernels/reduction/ops.h#L16 +template +inline ::metal::enable_if_t<::metal::is_same_v, T> simd_sum(T val) { + for (ushort i = simdgroup_size / 2; i > 0; i /= 2) { + val += as_type( + ::metal::simd_shuffle_and_fill_down(as_type(val), int2(0), i)); + } + return as_type(::metal::simd_broadcast(as_type(val), 0)); +} + +template +inline ::metal::enable_if_t<::metal::is_same_v, T> simd_prod(T val) { + for (ushort i = simdgroup_size / 2; i > 0; i /= 2) { + val *= as_type( + ::metal::simd_shuffle_and_fill_down(as_type(val), int2(0), i)); + } + return as_type(::metal::simd_broadcast(as_type(val), 0)); +} + +// Below algorithms are written with hardcoded assumption that simdgroup is 32 +// and threadgroup_max is 1024, i.e. reduction can be done in two stages max +template +opmath_t threadgroup_sum( + threadgroup opmath_t* data, + T val, + unsigned idx, + unsigned size) { + auto rc = simd_sum(static_cast>(val)); + if (idx % simdgroup_size == 0) { + data[idx / simdgroup_size] = rc; + } + if (size > simdgroup_size) { + ::metal::threadgroup_barrier(::metal::mem_flags::mem_threadgroup); + if (idx < ((size + simdgroup_size - 1) / simdgroup_size)) { + auto rc1 = simd_sum(data[idx]); + if (idx == 0) { + data[0] = rc1; + } + } + } + ::metal::threadgroup_barrier(::metal::mem_flags::mem_threadgroup); + return data[0]; +} + +template +opmath_t threadgroup_prod( + threadgroup opmath_t* data, + T val, + unsigned idx, + unsigned size) { + auto rc = simd_prod(static_cast>(val)); + if (idx % simdgroup_size == 0) { + data[idx / simdgroup_size] = rc; + } + if (size > simdgroup_size) { + ::metal::threadgroup_barrier(::metal::mem_flags::mem_threadgroup); + if (idx < ((size + simdgroup_size - 1) / simdgroup_size)) { + auto rc1 = simd_prod(data[idx]); + if (idx == 0) { + data[0] = rc1; + } + } + } + ::metal::threadgroup_barrier(::metal::mem_flags::mem_threadgroup); + return data[0]; +} + +template +float3 threadgroup_welford_reduce(threadgroup T* data, unsigned size) { + ::metal::threadgroup_barrier(::metal::mem_flags::mem_threadgroup); + float m = data[0]; + float m2 = 0; + for (unsigned idx = 1; idx < size; ++idx) { + float delta = data[idx] - m; + m += delta / (idx + 1); + m2 += delta * (data[idx] - m); + } + return float3(m, m2, size); +} + +// Each vec3type is tuple of mean, m2 and weight +template +float3 welford_combine(T a, T b) { + float delta = b.x - a.x; + float new_weight = a.z + b.z; + auto w2_over_w = new_weight != 0 ? b.z / new_weight : 0.0; + return float3( + a.x + delta * w2_over_w, + a.y + b.y + delta * delta * a.z * w2_over_w, + new_weight); +} + +template +float3 threadgroup_welford_combine(threadgroup T* data, unsigned size) { + ::metal::threadgroup_barrier(::metal::mem_flags::mem_threadgroup); + float3 rc = data[0]; + for (unsigned idx = 1; idx < size; ++idx) { + rc = welford_combine(rc, data[idx]); + } + return rc; +} + +template +T threadgroup_max(threadgroup T* data, unsigned size) { + // TODO: This should be moved to the callee + ::metal::threadgroup_barrier(::metal::mem_flags::mem_threadgroup); + T rc = data[0]; + for (unsigned idx = 1; idx < size; ++idx) { + rc = ::c10::metal::max(rc, data[idx]); + } + return rc; +} + +template +T threadgroup_min(threadgroup T* data, unsigned size) { + // TODO: This should be moved to the callee + ::metal::threadgroup_barrier(::metal::mem_flags::mem_threadgroup); + T rc = data[0]; + for (unsigned idx = 1; idx < size; ++idx) { + rc = ::c10::metal::min(rc, data[idx]); + } + return rc; +} + +template +int threadgroup_argmax(threadgroup T* data, unsigned size) { + // TODO: This should be moved to the callee + ::metal::threadgroup_barrier(::metal::mem_flags::mem_threadgroup); + int rc = 0; + for (unsigned idx = 1; idx < size; ++idx) { + if (data[idx] > data[rc]) { + rc = idx; + } + } + return rc; +} + +template +int threadgroup_argmin(threadgroup T* data, unsigned size) { + // TODO: This should be moved to the callee + ::metal::threadgroup_barrier(::metal::mem_flags::mem_threadgroup); + int rc = 0; + for (unsigned idx = 1; idx < size; ++idx) { + if (data[idx] < data[rc]) { + rc = idx; + } + } + return rc; +} + +} // namespace metal +} // namespace c10 diff --git a/phivenv/Lib/site-packages/torch/include/c10/metal/special_math.h b/phivenv/Lib/site-packages/torch/include/c10/metal/special_math.h new file mode 100644 index 0000000000000000000000000000000000000000..7d43c32ec5748cb160280e06eb016bc94728205e --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/metal/special_math.h @@ -0,0 +1,1794 @@ +// Implementation of specal math functions for Metal +#pragma once +#include +#include +#include + +namespace c10 { +namespace metal { + +/* + * Approximation to the error function. + * Based on code from: + * https://stackoverflow.com/questions/35148198/efficient-faithfully-rounded-implementation-of-error-function-erff#answer-35148199 + * Copy-n-pasted from + * https://github.com/ml-explore/mlx/blob/2e8cf0b4506c200a5c2d199ecbbf655fdf4c2ce2/mlx/backend/metal/kernels/erf.h#L11 + */ +template +inline float erf(T x) { + const auto a = static_cast(x); + const auto t = ::metal::abs(a); + const auto s = a * a; + if (t > 0.927734375f) { + // maximum error 0.99527 ulp + auto r = ::metal::fma( + -1.72853470e-5f, t, 3.83197126e-4f); // -0x1.220000p-16,0x1.91cfb2p-12 + const auto u = ::metal::fma( + -3.88396438e-3f, t, 2.42546219e-2f); // -0x1.fd1438p-9, 0x1.8d6342p-6 + r = ::metal::fma(r, s, u); + r = ::metal::fma(r, t, -1.06777877e-1f); // -0x1.b55cb8p-4 + r = ::metal::fma(r, t, -6.34846687e-1f); // -0x1.450aa0p-1 + r = ::metal::fma(r, t, -1.28717512e-1f); // -0x1.079d0cp-3 + r = ::metal::fma(r, t, -t); + // TODO, replace with expm1 when implemented + r = 1.0f - ::metal::exp(r); + r = ::metal::copysign(r, a); + return r; + } + + // maximum error 0.98929 ulp + auto r = -5.96761703e-4f; // -0x1.38e000p-11 + r = ::metal::fma(r, s, 4.99119423e-3f); // 0x1.471a58p-8 + r = ::metal::fma(r, s, -2.67681349e-2f); // -0x1.b691b2p-6 + r = ::metal::fma(r, s, 1.12819925e-1f); // 0x1.ce1c44p-4 + r = ::metal::fma(r, s, -3.76125336e-1f); // -0x1.812700p-2 + r = ::metal::fma(r, s, 1.28379166e-1f); // 0x1.06eba8p-3 + r = ::metal::fma(r, a, a); + return r; +} + +template +inline float erfinv(T y) { + /* coefficients in rational expansion */ + constexpr float a[4] = {0.886226899, -1.645349621, 0.914624893, -0.140543331}; + constexpr float b[4] = {-2.118377725, 1.442710462, -0.329097515, 0.012229801}; + constexpr float c[4] = {-1.970840454, -1.624906493, 3.429567803, 1.641345311}; + constexpr float d[2] = {3.543889200, 1.637067800}; + + float x, z, num, dem; /*working variables */ + + float y_abs = ::metal::abs(static_cast(y)); + if (y_abs >= 1.0f) { + return y_abs > 1.0f ? NAN + : ::metal::copysign(INFINITY, static_cast(y)); + } + if (y_abs <= 0.7f) { + z = y * y; + num = ((a[3] * z + a[2]) * z + a[1]) * z + a[0]; + dem = (((b[3] * z + b[2]) * z + b[1]) * z + b[0]) * z + 1.0f; + x = y * num / dem; + } else { + z = ::metal::sqrt(-1.0f * ::metal::log((1.0 - y_abs) / 2.0)); + num = ((c[3] * z + c[2]) * z + c[1]) * z + c[0]; + dem = (d[1] * z + d[0]) * z + 1.0f; + x = ::metal::copysign(num, static_cast(y)) / dem; + } + + return x; +} + +/* + * For licensing information and documentation, please refer to the cpu + * implementation located in "ATen/native/Math.h". + */ + +template +inline T chbevl(T x, const float array[], const int len) { + T b0, b1, b2; + + b0 = array[0]; + b1 = 0; + + for (int i = 1; i < len; ++i) { + b2 = b1; + b1 = b0; + b0 = x * b1 - b2 + array[i]; + } + + return T{0.5} * (b0 - b2); +} + +// Copied from +// https://github.com/pytorch/pytorch/blob/58b661cda2c002a8e1ac3bee494bfe1f7420437c/aten/src/ATen/native/cuda/Math.cuh#L502 + +template +inline T i0(T _x) { + auto x = ::metal::fabs(_x); + + if (x <= 8.0) { + /* Chebyshev coefficients for exp(-x) I0(x) + * in the interval [0,8]. + * + * lim(x->0){ exp(-x) I0(x) } = 1. + */ + constexpr float A[] = { + -4.41534164647933937950E-18, 3.33079451882223809783E-17, + -2.43127984654795469359E-16, 1.71539128555513303061E-15, + -1.16853328779934516808E-14, 7.67618549860493561688E-14, + -4.85644678311192946090E-13, 2.95505266312963983461E-12, + -1.72682629144155570723E-11, 9.67580903537323691224E-11, + -5.18979560163526290666E-10, 2.65982372468238665035E-9, + -1.30002500998624804212E-8, 6.04699502254191894932E-8, + -2.67079385394061173391E-7, 1.11738753912010371815E-6, + -4.41673835845875056359E-6, 1.64484480707288970893E-5, + -5.75419501008210370398E-5, 1.88502885095841655729E-4, + -5.76375574538582365885E-4, 1.63947561694133579842E-3, + -4.32430999505057594430E-3, 1.05464603945949983183E-2, + -2.37374148058994688156E-2, 4.93052842396707084878E-2, + -9.49010970480476444210E-2, 1.71620901522208775349E-1, + -3.04682672343198398683E-1, 6.76795274409476084995E-1}; + + auto y = (x / 2.0) - 2.0; + return static_cast(::metal::exp(x) * chbevl(y, A, 30)); + } + + // Handles x > 8 case + /* Chebyshev coefficients for exp(-x) sqrt(x) I0(x) + * in the inverted interval [8,infinity]. + * + * lim(x->inf){ exp(-x) sqrt(x) I0(x) } = 1/sqrt(2pi). + */ + constexpr float B[] = { + -7.23318048787475395456E-18, -4.83050448594418207126E-18, + 4.46562142029675999901E-17, 3.46122286769746109310E-17, + -2.82762398051658348494E-16, -3.42548561967721913462E-16, + 1.77256013305652638360E-15, 3.81168066935262242075E-15, + -9.55484669882830764870E-15, -4.15056934728722208663E-14, + 1.54008621752140982691E-14, 3.85277838274214270114E-13, + 7.18012445138366623367E-13, -1.79417853150680611778E-12, + -1.32158118404477131188E-11, -3.14991652796324136454E-11, + 1.18891471078464383424E-11, 4.94060238822496958910E-10, + 3.39623202570838634515E-9, 2.26666899049817806459E-8, + 2.04891858946906374183E-7, 2.89137052083475648297E-6, + 6.88975834691682398426E-5, 3.36911647825569408990E-3, + 8.04490411014108831608E-1}; + + return static_cast( + (::metal::exp(x) * chbevl(32.0 / x - 2.0, B, 25)) / ::metal::sqrt(x)); +} + +template +inline T i0e(T _x) { + auto x = ::metal::fabs(_x); + + if (x <= 8.0) { + constexpr float coefficients[] = { + -4.41534164647933937950E-18, 3.33079451882223809783E-17, + -2.43127984654795469359E-16, 1.71539128555513303061E-15, + -1.16853328779934516808E-14, 7.67618549860493561688E-14, + -4.85644678311192946090E-13, 2.95505266312963983461E-12, + -1.72682629144155570723E-11, 9.67580903537323691224E-11, + -5.18979560163526290666E-10, 2.65982372468238665035E-9, + -1.30002500998624804212E-8, 6.04699502254191894932E-8, + -2.67079385394061173391E-7, 1.11738753912010371815E-6, + -4.41673835845875056359E-6, 1.64484480707288970893E-5, + -5.75419501008210370398E-5, 1.88502885095841655729E-4, + -5.76375574538582365885E-4, 1.63947561694133579842E-3, + -4.32430999505057594430E-3, 1.05464603945949983183E-2, + -2.37374148058994688156E-2, 4.93052842396707084878E-2, + -9.49010970480476444210E-2, 1.71620901522208775349E-1, + -3.04682672343198398683E-1, 6.76795274409476084995E-1}; + + auto y = (x / 2.0) - 2.0; + return static_cast(chbevl(y, coefficients, int{30})); + } + + // x > 8 + constexpr float coefficients[] = { + -7.23318048787475395456E-18, -4.83050448594418207126E-18, + 4.46562142029675999901E-17, 3.46122286769746109310E-17, + -2.82762398051658348494E-16, -3.42548561967721913462E-16, + 1.77256013305652638360E-15, 3.81168066935262242075E-15, + -9.55484669882830764870E-15, -4.15056934728722208663E-14, + 1.54008621752140982691E-14, 3.85277838274214270114E-13, + 7.18012445138366623367E-13, -1.79417853150680611778E-12, + -1.32158118404477131188E-11, -3.14991652796324136454E-11, + 1.18891471078464383424E-11, 4.94060238822496958910E-10, + 3.39623202570838634515E-9, 2.26666899049817806459E-8, + 2.04891858946906374183E-7, 2.89137052083475648297E-6, + 6.88975834691682398426E-5, 3.36911647825569408990E-3, + 8.04490411014108831608E-1}; + + return static_cast( + chbevl(32.0 / x - 2.0, coefficients, 25) / ::metal::sqrt(x)); +} + +// Copied from +// https://github.com/pytorch/pytorch/blob/58b661cda2c002a8e1ac3bee494bfe1f7420437c/aten/src/ATen/native/cuda/Math.cuh#L576 + +template +inline T i1(T _x) { + const auto x = ::metal::fabs(_x); + + if (x <= 8.0) { + // Chebyshev coefficients for exp(-x) i1(x) in the internal [0, 8] + // lim(x->0){ exp(-x) i1(x) / x } = 1/2 + constexpr float coefficients[] = { + 2.77791411276104639959E-18, -2.11142121435816608115E-17, + 1.55363195773620046921E-16, -1.10559694773538630805E-15, + 7.60068429473540693410E-15, -5.04218550472791168711E-14, + 3.22379336594557470981E-13, -1.98397439776494371520E-12, + 1.17361862988909016308E-11, -6.66348972350202774223E-11, + 3.62559028155211703701E-10, -1.88724975172282928790E-9, + 9.38153738649577178388E-9, -4.44505912879632808065E-8, + 2.00329475355213526229E-7, -8.56872026469545474066E-7, + 3.47025130813767847674E-6, -1.32731636560394358279E-5, + 4.78156510755005422638E-5, -1.61760815825896745588E-4, + 5.12285956168575772895E-4, -1.51357245063125314899E-3, + 4.15642294431288815669E-3, -1.05640848946261981558E-2, + 2.47264490306265168283E-2, -5.29459812080949914269E-2, + 1.02643658689847095384E-1, -1.76416518357834055153E-1, + 2.52587186443633654823E-1}; + const auto y = x / 2.0 - 2.0; + const auto out = ::metal::exp(x) * x * chbevl(y, coefficients, 29); + return static_cast(_x < T(0.) ? -out : out); + } + + // Chebyshev coefficients for exp(-x) sqrt(x) i1(x) + // in the inverted interval [8, infinity] + // lim(x->inf){ exp(-x) sqrt(x) i1(x) } = 1/sqrt(2pi) + constexpr float coefficients[] = { + 7.51729631084210481353E-18, 4.41434832307170791151E-18, + -4.65030536848935832153E-17, -3.20952592199342395980E-17, + 2.96262899764595013876E-16, 3.30820231092092828324E-16, + -1.88035477551078244854E-15, -3.81440307243700780478E-15, + 1.04202769841288027642E-14, 4.27244001671195135429E-14, + -2.10154184277266431302E-14, -4.08355111109219731823E-13, + -7.19855177624590851209E-13, 2.03562854414708950722E-12, + 1.41258074366137813316E-11, 3.25260358301548823856E-11, + -1.89749581235054123450E-11, -5.58974346219658380687E-10, + -3.83538038596423702205E-9, -2.63146884688951950684E-8, + -2.51223623787020892529E-7, -3.88256480887769039346E-6, + -1.10588938762623716291E-4, -9.76109749136146840777E-3, + 7.78576235018280120474E-1}; + const auto out = (::metal::exp(x) * chbevl(32. / x - 2., coefficients, 25)) / + ::metal::sqrt(x); + return static_cast(_x < T(0.) ? -out : out); +} + +template +inline T i1e(T _x) { + const auto x = ::metal::fabs(_x); + if (x <= 8.0) { + // Chebyshev double coefficients for exp(-x) i1(x) in the interval [0,8]. + // Note: lim(x->0){ exp(-x) i1(x) / x } = 1/2. + constexpr float coefficients[] = { + 9.38153738649577178388E-9f, + -4.44505912879632808065E-8f, + 2.00329475355213526229E-7f, + -8.56872026469545474066E-7f, + 3.47025130813767847674E-6f, + -1.32731636560394358279E-5f, + 4.78156510755005422638E-5f, + -1.61760815825896745588E-4f, + 5.12285956168575772895E-4f, + -1.51357245063125314899E-3f, + 4.15642294431288815669E-3f, + -1.05640848946261981558E-2f, + 2.47264490306265168283E-2f, + -5.29459812080949914269E-2f, + 1.02643658689847095384E-1f, + -1.76416518357834055153E-1f, + 2.52587186443633654823E-1f}; + const auto y = x / 2.0 - 2.0; + const auto out = chbevl(y, coefficients, 17) * x; + return static_cast(_x < 0. ? -out : out); + } + + // Chebyshev coefficients for exp(-x) sqrt(x) i1(x) + // in the inverted interval (8, infinity]. + // Note: lim(x->inf){ exp(-x) sqrt(x) i1(x) } = 1/sqrt(2pi). + // TODO: what's an "inverted interval"? Open on the left + // and closed on the right? + constexpr float coefficients[] = { + -3.83538038596423702205E-9f, + -2.63146884688951950684E-8f, + -2.51223623787020892529E-7f, + -3.88256480887769039346E-6f, + -1.10588938762623716291E-4f, + -9.76109749136146840777E-3f, + 7.78576235018280120474E-1f}; + + const auto out = + chbevl(32. / x - 2., coefficients, 7) / ::metal::precise::sqrt(x); + return static_cast(_x < 0. ? -out : out); +} + +// gamma, lgamma +template +inline float log_gamma(const T); + +template +inline float gamma(const T x) { + if (x < 0.001) { + constexpr float EULER_MASCHERONI = 0.577215664901532860606512090; + // For small x, 1/gamma(x) has power series x + gamma x^2 - ... + // So in this range, 1/gamma(x) = x + gamma x^2 with error on the order of + // x^3. The relative error over this interval is less than 6e-7. + + return 1.0 / (x * (1.0 + EULER_MASCHERONI * x)); + } + if (x >= 12.0) { + return ::metal::exp(log_gamma(x)); + } + // The algorithm directly approximates gamma over (1,2) and uses + // reduction identities to reduce other arguments to this interval. + // numerator coefficients for gamma approximation over the interval (1,2) + constexpr float GAMMA_NUMERATOR_COEF[8] = { + -1.71618513886549492533811E+0, + 2.47656508055759199108314E+1, + -3.79804256470945635097577E+2, + 6.29331155312818442661052E+2, + 8.66966202790413211295064E+2, + -3.14512729688483675254357E+4, + -3.61444134186911729807069E+4, + 6.64561438202405440627855E+4}; + + // denominator coefficients for gamma approximation over the interval (1,2) + constexpr float GAMMA_DENOMINATOR_COEF[8] = { + -3.08402300119738975254353E+1, + 3.15350626979604161529144E+2, + -1.01515636749021914166146E+3, + -3.10777167157231109440444E+3, + 2.25381184209801510330112E+4, + 4.75584627752788110767815E+3, + -1.34659959864969306392456E+5, + -1.15132259675553483497211E+5}; + + // Add or subtract integers as necessary to bring y into (1,2) + float y = 1.0 + ::metal::fract(x); + + float num = 0.0; + float den = 1.0; + + float z = y - 1; + for (int i = 0; i < 8; i++) { + num = (num + GAMMA_NUMERATOR_COEF[i]) * z; + den = den * z + GAMMA_DENOMINATOR_COEF[i]; + } + float result = num / den + 1.0; + + // Apply correction if argument was not initially in (1,2) + if (x < 1.0) { + // identity gamma(z) = gamma(z+1)/z + result /= (y - 1.0); + } else { + // identity gamma(z+n) = z*(z+1)* ... *(z+n-1)*gamma(z) + auto n = static_cast(::metal::floor(x)); + for (int i = 1; i < n; i++) { + result *= y++; + } + } + + return result; +} + +template +inline float log_gamma(const T x) { + constexpr float LOG_PI = 1.14472988584940017414342735135305; + constexpr float HALF_LOG_TWO_PI = 0.91893853320467274178032973640562; + constexpr float LGAMMA_EXPANSION_COEF[8] = { + 1.0 / 12.0, + -1.0 / 360.0, + 1.0 / 1260.0, + -1.0 / 1680.0, + 1.0 / 1188.0, + -691.0 / 360360.0, + 1.0 / 156.0, + -3617.0 / 122400.0}; + + float rc; + + const auto abs_x = ::metal::abs(static_cast(x)); + if (abs_x == 0) { + return INFINITY; + } + if (abs_x < 12.0) { + rc = ::metal::log(::metal::abs(gamma(abs_x))); + } else { + // Abramowitz and Stegun 6.1.41 + // Asymptotic series should be good to at least 11 or 12 figures + // For error analysis, see Whittiker and Watson + // A Course in Modern Analysis (1927), page 252 + + float z = 1.0 / (abs_x * abs_x); + float sum = LGAMMA_EXPANSION_COEF[7]; + + for (int i = 6; i >= 0; i--) { + sum *= z; + sum += LGAMMA_EXPANSION_COEF[i]; + } + float series = sum / abs_x; + + rc = (abs_x - 0.5) * ::metal::log(abs_x) - abs_x + HALF_LOG_TWO_PI + series; + } + + if (x >= 0) { + return rc; + } + + // Reflection formula + // Compute arg first to workaround Metal compiler bgg of sorts on M4 + // See https://github.com/pytorch/pytorch/pull/145740 for more details + auto log_arg = abs_x * ::metal::abs(::metal::sinpi(abs_x)); + return LOG_PI - rc - ::metal::log(log_arg); +} + +inline float zeta(float x, float q) { + constexpr float MACHEP = 1.11022302462515654042E-16; + constexpr float ZETA_EXPANSION[] = { + 12.0, + -720.0, + 30240.0, + -1209600.0, + 47900160.0, + -1.8924375803183791606e9, + 7.47242496e10, + -2.950130727918164224e12, + 1.1646782814350067249e14, + -4.5979787224074726105e15, + 1.8152105401943546773e17, + -7.1661652561756670113e18}; + if (x == 1.0f) { + return INFINITY; + } + + if (x < 1.0f) { + return NAN; + } + + if (q <= 0.0f) { + if (q == ::metal::trunc(q)) { + return INFINITY; + } + if (x != ::metal::trunc(x)) { + return NAN; + } + } + + float s = ::metal::pow(q, -x); + float a = q; + int i = 0; + float b = 0.0f; + while ((i < 9) || (a <= 9.0f)) { + i += 1; + a += 1.0f; + b = ::metal::pow(a, -x); + s += b; + if ((-MACHEP * s < b) && (b < MACHEP * s)) { + return s; + } + } + + float w = a; + s += b * w / (x - 1.0f); + s -= 0.5f * b; + a = 1.0f; + float t; + float k = 0.0f; + for (int i = 0; i < 12; i++) { + a *= x + k; + b /= w; + t = a * b / ZETA_EXPANSION[i]; + s += t; + t = ::metal::fabs(t / s); + if (t < MACHEP) { + return s; + } + k += 1.0f; + a *= x + k; + b /= w; + k += 1.0f; + } + return s; +} + +inline float calc_digamma_positive_domain(float x) { + constexpr float DIGAMMA_COEF[7] = { + 8.33333333333333333333E-2, + -2.10927960927960927961E-2, + 7.57575757575757575758E-3, + -4.16666666666666666667E-3, + 3.96825396825396825397E-3, + -8.33333333333333333333E-3, + 8.33333333333333333333E-2, + }; + + // Push x to be >= 10 + float result = 0; + while (x < 10) { + result -= 1 / x; + x += 1; + } + if (x == 10) { + constexpr float PSI_10 = 2.25175258906672110764; + return result + PSI_10; + } + + // Compute asymptotic digamma + float y = 0; + if (x < 1.0E+17) { + float z = 1.0 / (x * x); + for (int i = 0; i <= 6; i++) { + y += ::metal::pow(z, i) * DIGAMMA_COEF[i]; + } + y *= z; + } + return result + ::metal::log(x) - (0.5 / x) - y; +} + +template +inline float digamma(T0 x) { + if (x < 0.0f) { + if (x == ::metal::trunc(x)) { + // As per C++ standard for gamma related functions and SciPy, + // If the argument is a negative integer, NaN is returned + return NAN; + } else { + // Extracts the fractional part of x as r, since tan(pi * r) is more + // numerically accurate than tan(pi * x). While these operations are + // mathematically equivalent since both x and r are in radians and tan() + // has a periodicity of pi, in practice the computation of pi * x is a + // source of error (when |x| > 1). + float r = ::metal::fract(x); + return calc_digamma_positive_domain(1.0f - x) - + M_PI_F / ::metal::tan(M_PI_F * r); + } + } else if (x == 0.0f) { + // As per C++ standard for gamma related functions and SciPy, + // If the argument is ±0, ±∞ is returned + return ::metal::copysign(INFINITY, static_cast(-x)); + } else { + return calc_digamma_positive_domain(x); + } +} + +template +inline float polygamma(const int64_t order, const T0 input) { + // Filter out n == 0. + if (order == 0) { + return digamma(input); + } + + float x = input; + float n = order; + float sgn = ((order % 2) ? 1 : -1); + return sgn * gamma(n + 1) * zeta(n + 1, x); +} + +template +inline ::metal::enable_if_t, T> sinc(T a) { + if (a == static_cast(0)) { + return static_cast(1); + } + auto product = M_PI_F * static_cast(a); + return static_cast(::metal::precise::sin(product) / product); +} + +// Complex sinc2 implementation +template +inline ::metal::enable_if_t, T> sinc(T inp) { + auto a = static_cast(inp) * M_PI_F; + const float a2 = a.x * a.x + a.y * a.y; + if (a2 == 0) { + return 0; + } + float cosx; + float sinx = ::metal::sincos(a.x, cosx); + float sinhy = ::metal::sinh(a.y); + float coshy = ::metal::cosh(a.y); + auto re = sinx * coshy * a.x + cosx * sinhy * a.y; + auto im = cosx * sinhy * a.x - sinx * coshy * a.y; + return T(re, im) / a2; +} + +template +inline T spherical_bessel_j0(T x) { + if (::metal::isinf(x)) + return T(0.0); + T x2 = x * x; + T k1 = static_cast(-1.0); + T k2 = static_cast(1.0); + + if (::metal::fabs(static_cast(x)) < T(0.5)) { + return T(1.0) + + x2 * + (k1 / T(6.0) + + x2 * + (k2 / T(120.0) + + x2 * + (k1 / T(5040.0) + + x2 * + (k2 / T(362880.0) + + x2 * + (k1 / T(39916800.0) + + x2 * (k2 / T(6227020800.0))))))); + } + + return static_cast(::metal::sin(x) / x); +} + +template +inline float xlog1py(T x, T y) { + if (::metal::isnan(y)) { + return NAN; + } + + if (x == 0) { + return x; + } + + return x * ::c10::metal::log1p(y); +} + +template +inline T entr(T a) { + if (a != a) { + return a; + } + + if (a > 0) { + return static_cast(-a * ::metal::log(a)); + } + + if (a == 0) { + return 0; + } + + return static_cast(-INFINITY); +} + +// Copy-n-paste from aten/src/ATen/native/cuda/Math.cuh lines 1463-1915 +template +inline float bessel_j0_forward(T x) { + constexpr float PP[] = { + +7.96936729297347051624e-04, + +8.28352392107440799803e-02, + +1.23953371646414299388e+00, + +5.44725003058768775090e+00, + +8.74716500199817011941e+00, + +5.30324038235394892183e+00, + +9.99999999999999997821e-01, + }; + + constexpr float PQ[] = { + +9.24408810558863637013e-04, + +8.56288474354474431428e-02, + +1.25352743901058953537e+00, + +5.47097740330417105182e+00, + +8.76190883237069594232e+00, + +5.30605288235394617618e+00, + +1.00000000000000000218e+00, + }; + + constexpr float QP[] = { + -1.13663838898469149931e-02, + -1.28252718670509318512e+00, + -1.95539544257735972385e+01, + -9.32060152123768231369e+01, + -1.77681167980488050595e+02, + -1.47077505154951170175e+02, + -5.14105326766599330220e+01, + -6.05014350600728481186e+00, + }; + + constexpr float QQ[] = { + +6.43178256118178023184e+01, + +8.56430025976980587198e+02, + +3.88240183605401609683e+03, + +7.24046774195652478189e+03, + +5.93072701187316984827e+03, + +2.06209331660327847417e+03, + +2.42005740240291393179e+02, + }; + + constexpr float RP[] = { + -4.79443220978201773821e+09, + +1.95617491946556577543e+12, + -2.49248344360967716204e+14, + +9.70862251047306323952e+15, + }; + + constexpr float RQ[] = { + +4.99563147152651017219e+02, + +1.73785401676374683123e+05, + +4.84409658339962045305e+07, + +1.11855537045356834862e+10, + +2.11277520115489217587e+12, + +3.10518229857422583814e+14, + +3.18121955943204943306e+16, + +1.71086294081043136091e+18, + }; + + if (x < T(0)) { + x = -x; + } + + if (x <= T(5.0)) { + if (x < T(0.00001)) { + return 1.0 - x * x / 4.0; + } + + float rp = 0.0; + + for (auto index = 0; index <= 3; index++) { + rp = rp * (x * x) + RP[index]; + } + + float rq = 0.0; + + for (auto index = 0; index <= 7; index++) { + rq = rq * (x * x) + RQ[index]; + } + + return (x * x - 5.78318596294678452118e+00) * + (x * x - T(3.04712623436620863991e+01)) * rp / rq; + } + + float pp = 0.0; + + for (auto index = 0; index <= 6; index++) { + pp = pp * (25.0 / (x * x)) + PP[index]; + } + + float pq = 0.0; + + for (auto index = 0; index <= 6; index++) { + pq = pq * (25.0 / (x * x)) + PQ[index]; + } + + float qp = 0.0; + + for (auto index = 0; index <= 7; index++) { + qp = qp * (25.0 / (x * x)) + QP[index]; + } + + float qq = 0.0; + + for (auto index = 0; index <= 6; index++) { + qq = qq * (25.0 / (x * x)) + QQ[index]; + } + + return (pp / pq * + ::metal::precise::cos( + x - T(0.785398163397448309615660845819875721)) - + 5.0 / x * (qp / qq) * + ::metal::precise::sin( + x - 0.785398163397448309615660845819875721)) * + 0.797884560802865355879892119868763737 / ::metal::precise::sqrt(x); +} // bessel_j0_forward(T x) + +template +inline float bessel_y0_forward(T x) { + constexpr float PP[] = { + +7.96936729297347051624e-04, + +8.28352392107440799803e-02, + +1.23953371646414299388e+00, + +5.44725003058768775090e+00, + +8.74716500199817011941e+00, + +5.30324038235394892183e+00, + +9.99999999999999997821e-01, + }; + + constexpr float PQ[] = { + +9.24408810558863637013e-04, + +8.56288474354474431428e-02, + +1.25352743901058953537e+00, + +5.47097740330417105182e+00, + +8.76190883237069594232e+00, + +5.30605288235394617618e+00, + +1.00000000000000000218e+00, + }; + + constexpr float QP[] = { + -1.13663838898469149931e-02, + -1.28252718670509318512e+00, + -1.95539544257735972385e+01, + -9.32060152123768231369e+01, + -1.77681167980488050595e+02, + -1.47077505154951170175e+02, + -5.14105326766599330220e+01, + -6.05014350600728481186e+00, + }; + + constexpr float QQ[] = { + +6.43178256118178023184e+01, + +8.56430025976980587198e+02, + +3.88240183605401609683e+03, + +7.24046774195652478189e+03, + +5.93072701187316984827e+03, + +2.06209331660327847417e+03, + +2.42005740240291393179e+02, + }; + + constexpr float YP[] = { + +1.55924367855235737965e+04, + -1.46639295903971606143e+07, + +5.43526477051876500413e+09, + -9.82136065717911466409e+11, + +8.75906394395366999549e+13, + -3.46628303384729719441e+15, + +4.42733268572569800351e+16, + -1.84950800436986690637e+16, + }; + + constexpr float YQ[] = { + +1.04128353664259848412e+03, + +6.26107330137134956842e+05, + +2.68919633393814121987e+08, + +8.64002487103935000337e+10, + +2.02979612750105546709e+13, + +3.17157752842975028269e+15, + +2.50596256172653059228e+17, + }; + + if (x <= T(5.0)) { + if (x == T(0.0)) { + return -INFINITY; + } + + if (x < T(0.0)) { + return NAN; + } + + float yp = 0.0; + + for (auto index = 0; index <= 7; index++) { + yp = yp * (x * x) + YP[index]; + } + + float yq = 0.0; + + for (auto index = 0; index <= 6; index++) { + yq = yq * (x * x) + YQ[index]; + } + + return yp / yq + + (0.636619772367581343075535053490057448 * ::metal::precise::log(x) * + bessel_j0_forward(x)); + } + + float pp = 0.0; + + for (auto index = 0; index <= 6; index++) { + pp = pp * (25.0 / (x * x)) + PP[index]; + } + + float pq = 0.0; + + for (auto index = 0; index <= 6; index++) { + pq = pq * (25.0 / (x * x)) + PQ[index]; + } + + float qp = 0.0; + + for (auto index = 0; index <= 7; index++) { + qp = qp * (25.0 / (x * x)) + QP[index]; + } + + float qq = 0.0; + + for (auto index = 0; index <= 6; index++) { + qq = qq * (25.0 / (x * x)) + QQ[index]; + } + + return (pp / pq * + ::metal::precise::sin( + x - 0.785398163397448309615660845819875721) + + 5.0 / x * (qp / qq) * + ::metal::precise::cos( + x - 0.785398163397448309615660845819875721)) * + 0.797884560802865355879892119868763737 / ::metal::precise::sqrt(x); +} // bessel_y0_forward(T x) + +template +inline float bessel_j1_forward(T x) { + constexpr float PP[] = { + +7.62125616208173112003e-04, + +7.31397056940917570436e-02, + +1.12719608129684925192e+00, + +5.11207951146807644818e+00, + +8.42404590141772420927e+00, + +5.21451598682361504063e+00, + +1.00000000000000000254e+00, + }; + + constexpr float PQ[] = { + +5.71323128072548699714e-04, + +6.88455908754495404082e-02, + +1.10514232634061696926e+00, + +5.07386386128601488557e+00, + +8.39985554327604159757e+00, + +5.20982848682361821619e+00, + +9.99999999999999997461e-01, + }; + + constexpr float QP[] = { + +5.10862594750176621635e-02, + +4.98213872951233449420e+00, + +7.58238284132545283818e+01, + +3.66779609360150777800e+02, + +7.10856304998926107277e+02, + +5.97489612400613639965e+02, + +2.11688757100572135698e+02, + +2.52070205858023719784e+01, + }; + + constexpr float QQ[] = { + +7.42373277035675149943e+01, + +1.05644886038262816351e+03, + +4.98641058337653607651e+03, + +9.56231892404756170795e+03, + +7.99704160447350683650e+03, + +2.82619278517639096600e+03, + +3.36093607810698293419e+02, + }; + + constexpr float RP[] = { + -8.99971225705559398224e+08, + +4.52228297998194034323e+11, + -7.27494245221818276015e+13, + +3.68295732863852883286e+15, + }; + + constexpr float RQ[] = { + +6.20836478118054335476e+02, + +2.56987256757748830383e+05, + +8.35146791431949253037e+07, + +2.21511595479792499675e+10, + +4.74914122079991414898e+12, + +7.84369607876235854894e+14, + +8.95222336184627338078e+16, + +5.32278620332680085395e+18, + }; + + if (x < T(0.0)) { + return -bessel_j1_forward(-x); + } + + if (x <= T(5.0)) { + float rp = 0.0; + + for (auto index = 0; index <= 3; index++) { + rp = rp * (x * x) + RP[index]; + } + + float rq = 0.0; + + for (auto index = 0; index <= 7; index++) { + rq = rq * (x * x) + RQ[index]; + } + + return rp / rq * x * (x * x - 1.46819706421238932572e+01) * + (x * x - 4.92184563216946036703e+01); + } + + float pp = 0.0; + + for (auto index = 0; index <= 6; index++) { + pp = pp * (5.0 / x * (5.0 / x)) + PP[index]; + } + + float pq = 0.0; + + for (auto index = 0; index <= 6; index++) { + pq = pq * (5.0 / x * (5.0 / x)) + PQ[index]; + } + + float qp = 0.0; + + for (auto index = 0; index <= 7; index++) { + qp = qp * (5.0 / x * (5.0 / x)) + QP[index]; + } + + float qq = 0.0; + + for (auto index = 0; index <= 6; index++) { + qq = qq * (5.0 / x * (5.0 / x)) + QQ[index]; + } + + return (pp / pq * + ::metal::precise::cos( + x - 2.356194490192344928846982537459627163) - + 5.0 / x * (qp / qq) * + ::metal::precise::sin( + x - 2.356194490192344928846982537459627163)) * + 0.797884560802865355879892119868763737 / ::metal::precise::sqrt(x); +} // bessel_j1_forward(T x) + +template +inline float bessel_y1_forward(T x) { + constexpr float PP[] = { + +7.62125616208173112003e-04, + +7.31397056940917570436e-02, + +1.12719608129684925192e+00, + +5.11207951146807644818e+00, + +8.42404590141772420927e+00, + +5.21451598682361504063e+00, + +1.00000000000000000254e+00, + }; + + constexpr float PQ[] = { + +5.71323128072548699714e-04, + +6.88455908754495404082e-02, + +1.10514232634061696926e+00, + +5.07386386128601488557e+00, + +8.39985554327604159757e+00, + +5.20982848682361821619e+00, + +9.99999999999999997461e-01, + }; + + constexpr float QP[] = { + +5.10862594750176621635e-02, + +4.98213872951233449420e+00, + +7.58238284132545283818e+01, + +3.66779609360150777800e+02, + +7.10856304998926107277e+02, + +5.97489612400613639965e+02, + +2.11688757100572135698e+02, + +2.52070205858023719784e+01, + }; + + constexpr float QQ[] = { + +7.42373277035675149943e+01, + +1.05644886038262816351e+03, + +4.98641058337653607651e+03, + +9.56231892404756170795e+03, + +7.99704160447350683650e+03, + +2.82619278517639096600e+03, + +3.36093607810698293419e+02, + }; + + constexpr float YP[] = { + +1.26320474790178026440e+09, + -6.47355876379160291031e+11, + +1.14509511541823727583e+14, + -8.12770255501325109621e+15, + +2.02439475713594898196e+17, + -7.78877196265950026825e+17, + }; + + constexpr float YQ[] = { + +5.94301592346128195359e+02, + +2.35564092943068577943e+05, + +7.34811944459721705660e+07, + +1.87601316108706159478e+10, + +3.88231277496238566008e+12, + +6.20557727146953693363e+14, + +6.87141087355300489866e+16, + +3.97270608116560655612e+18, + }; + + if (x <= T(5.0)) { + if (x == T(0.0)) { + return -INFINITY; + } + + if (x <= T(0.0)) { + return NAN; + } + + float yp = 0.0; + + for (auto index = 0; index <= 5; index++) { + yp = yp * (x * x) + YP[index]; + } + + float yq = 0.0; + + for (auto index = 0; index <= 7; index++) { + yq = yq * (x * x) + YQ[index]; + } + + return x * (yp / yq) + + (0.636619772367581343075535053490057448 * + (bessel_j1_forward(x) * ::metal::precise::log(x) - 1.0 / x)); + } + + float pp = 0.0; + + for (auto index = 0; index <= 6; index++) { + pp = pp * (5.0 / x * (5.0 / x)) + PP[index]; + } + + float pq = 0.0; + + for (auto index = 0; index <= 6; index++) { + pq = pq * (5.0 / x * (5.0 / x)) + PQ[index]; + } + + float qp = 0.0; + + for (auto index = 0; index <= 7; index++) { + qp = qp * (5.0 / x * (5.0 / x)) + QP[index]; + } + + float qq = 0.0; + + for (auto index = 0; index <= 6; index++) { + qq = qq * (5.0 / x * (5.0 / x)) + QQ[index]; + } + + return (pp / pq * + ::metal::precise::sin( + x - 2.356194490192344928846982537459627163) + + 5.0 / x * (qp / qq) * + ::metal::precise::cos( + x - 2.356194490192344928846982537459627163)) * + 0.797884560802865355879892119868763737 / ::metal::precise::sqrt(x); +} // bessel_y1_forward(T x) + +template +inline float modified_bessel_i0_forward(T x) { + constexpr float A[] = { + -4.41534164647933937950e-18, +3.33079451882223809783e-17, + -2.43127984654795469359e-16, +1.71539128555513303061e-15, + -1.16853328779934516808e-14, +7.67618549860493561688e-14, + -4.85644678311192946090e-13, +2.95505266312963983461e-12, + -1.72682629144155570723e-11, +9.67580903537323691224e-11, + -5.18979560163526290666e-10, +2.65982372468238665035e-09, + -1.30002500998624804212e-08, +6.04699502254191894932e-08, + -2.67079385394061173391e-07, +1.11738753912010371815e-06, + -4.41673835845875056359e-06, +1.64484480707288970893e-05, + -5.75419501008210370398e-05, +1.88502885095841655729e-04, + -5.76375574538582365885e-04, +1.63947561694133579842e-03, + -4.32430999505057594430e-03, +1.05464603945949983183e-02, + -2.37374148058994688156e-02, +4.93052842396707084878e-02, + -9.49010970480476444210e-02, +1.71620901522208775349e-01, + -3.04682672343198398683e-01, +6.76795274409476084995e-01, + }; + + constexpr float B[] = { + -7.23318048787475395456e-18, -4.83050448594418207126e-18, + +4.46562142029675999901e-17, +3.46122286769746109310e-17, + -2.82762398051658348494e-16, -3.42548561967721913462e-16, + +1.77256013305652638360e-15, +3.81168066935262242075e-15, + -9.55484669882830764870e-15, -4.15056934728722208663e-14, + +1.54008621752140982691e-14, +3.85277838274214270114e-13, + +7.18012445138366623367e-13, -1.79417853150680611778e-12, + -1.32158118404477131188e-11, -3.14991652796324136454e-11, + +1.18891471078464383424e-11, +4.94060238822496958910e-10, + +3.39623202570838634515e-09, +2.26666899049817806459e-08, + +2.04891858946906374183e-07, +2.89137052083475648297e-06, + +6.88975834691682398426e-05, +3.36911647825569408990e-03, + +8.04490411014108831608e-01, + }; + + float p; + float q = 0.0; + + if (::metal::fabs(x) <= 8.0) { + float a = A[0]; + + for (uint8_t index = 1; index < 30; index++) { + p = q; + q = a; + a = (.5 * ::metal::fabs(x) - 2.0) * q - p + A[index]; + } + + return ::metal::exp(::metal::fabs(x)) * (T(0.5) * (a - p)); + } + + float b = B[0]; + + for (uint8_t index = 1; index < 25; index++) { + p = q; + q = b; + b = (32.0 / ::metal::fabs(x) - 2.0) * q - p + B[index]; + } + + return ::metal::exp(::metal::fabs(x)) * (.5 * (b - p)) / + ::metal::precise::sqrt(::metal::fabs(x)); +} // modified_bessel_i0_forward(T x) + +template +inline float modified_bessel_i1_forward(T x) { + constexpr float A[] = { + +2.77791411276104639959e-18, -2.11142121435816608115e-17, + +1.55363195773620046921e-16, -1.10559694773538630805e-15, + +7.60068429473540693410e-15, -5.04218550472791168711e-14, + +3.22379336594557470981e-13, -1.98397439776494371520e-12, + +1.17361862988909016308e-11, -6.66348972350202774223e-11, + +3.62559028155211703701e-10, -1.88724975172282928790e-09, + +9.38153738649577178388e-09, -4.44505912879632808065e-08, + +2.00329475355213526229e-07, -8.56872026469545474066e-07, + +3.47025130813767847674e-06, -1.32731636560394358279e-05, + +4.78156510755005422638e-05, -1.61760815825896745588e-04, + +5.12285956168575772895e-04, -1.51357245063125314899e-03, + +4.15642294431288815669e-03, -1.05640848946261981558e-02, + +2.47264490306265168283e-02, -5.29459812080949914269e-02, + +1.02643658689847095384e-01, -1.76416518357834055153e-01, + +2.52587186443633654823e-01, + }; + + constexpr float B[] = { + +7.51729631084210481353e-18, +4.41434832307170791151e-18, + -4.65030536848935832153e-17, -3.20952592199342395980e-17, + +2.96262899764595013876e-16, +3.30820231092092828324e-16, + -1.88035477551078244854e-15, -3.81440307243700780478e-15, + +1.04202769841288027642e-14, +4.27244001671195135429e-14, + -2.10154184277266431302e-14, -4.08355111109219731823e-13, + -7.19855177624590851209e-13, +2.03562854414708950722e-12, + +1.41258074366137813316e-11, +3.25260358301548823856e-11, + -1.89749581235054123450e-11, -5.58974346219658380687e-10, + -3.83538038596423702205e-09, -2.63146884688951950684e-08, + -2.51223623787020892529e-07, -3.88256480887769039346e-06, + -1.10588938762623716291e-04, -9.76109749136146840777e-03, + +7.78576235018280120474e-01, + }; + + float p; + float q = 0.0; + + if (::metal::fabs(x) <= T(8.0)) { + float a = A[0]; + + for (uint8_t index = 1; index < 29; index++) { + p = q; + q = a; + a = (.5 * ::metal::fabs(x) - 2.0) * q - p + A[index]; + } + + return .5 * (a - p) * x * ::metal::precise::exp(::metal::fabs(x)); + } + + float b = B[0]; + + for (uint8_t index = 1; index < 25; index++) { + p = q; + q = b; + b = (32.0 / ::metal::fabs(x) - 2.0) * q - p + B[index]; + } + + if (x < 0.0) { + return -( + ::metal::precise::exp(::metal::fabs(x)) * (0.5 * (b - p)) / + ::metal::precise::sqrt(::metal::fabs(x))); + } + + return ::metal::precise::exp(::metal::fabs(x)) * (0.5 * (b - p)) / + ::metal::precise::sqrt(::metal::fabs(x)); +} // modified_bessel_i1_forward(T x) + +template +inline float modified_bessel_k0_forward(T x) { + constexpr float A[] = { + +1.37446543561352307156e-16, + +4.25981614279661018399e-14, + +1.03496952576338420167e-11, + +1.90451637722020886025e-09, + +2.53479107902614945675e-07, + +2.28621210311945178607e-05, + +1.26461541144692592338e-03, + +3.59799365153615016266e-02, + +3.44289899924628486886e-01, + -5.35327393233902768720e-01, + }; + + constexpr float B[] = { + +5.30043377268626276149e-18, -1.64758043015242134646e-17, + +5.21039150503902756861e-17, -1.67823109680541210385e-16, + +5.51205597852431940784e-16, -1.84859337734377901440e-15, + +6.34007647740507060557e-15, -2.22751332699166985548e-14, + +8.03289077536357521100e-14, -2.98009692317273043925e-13, + +1.14034058820847496303e-12, -4.51459788337394416547e-12, + +1.85594911495471785253e-11, -7.95748924447710747776e-11, + +3.57739728140030116597e-10, -1.69753450938905987466e-09, + +8.57403401741422608519e-09, -4.66048989768794782956e-08, + +2.76681363944501510342e-07, -1.83175552271911948767e-06, + +1.39498137188764993662e-05, -1.28495495816278026384e-04, + +1.56988388573005337491e-03, -3.14481013119645005427e-02, + +2.44030308206595545468e+00, + }; + + if (x == 0.0) { + return INFINITY; + } + + if (x < 0.0) { + return NAN; + } + + float p; + float q = 0.0; + + if (x <= 2.0) { + float a = A[0]; + + for (uint8_t index = 1; index < 10; index++) { + p = q; + q = a; + a = (x * x - 2.0) * q - p + A[index]; + } + + return 0.5 * (a - p) - + ::metal::log(0.5 * x) * modified_bessel_i0_forward(x); + } + + float b = B[0]; + + for (uint8_t index = 1; index < 25; index++) { + p = q; + q = b; + b = (8.0 / x - 2.0) * q - p + B[index]; + } + + return ::metal::exp(-x) * (0.5 * (b - p)) / ::metal::sqrt(x); +} // modified_bessel_k0_forward(T x) + +template +inline float modified_bessel_k1_forward(T x) { + constexpr float A[] = { + -7.02386347938628759343e-18, + -2.42744985051936593393e-15, + -6.66690169419932900609e-13, + -1.41148839263352776110e-10, + -2.21338763073472585583e-08, + -2.43340614156596823496e-06, + -1.73028895751305206302e-04, + -6.97572385963986435018e-03, + -1.22611180822657148235e-01, + -3.53155960776544875667e-01, + +1.52530022733894777053e+00, + }; + + constexpr float B[] = { + -5.75674448366501715755e-18, +1.79405087314755922667e-17, + -5.68946255844285935196e-17, +1.83809354436663880070e-16, + -6.05704724837331885336e-16, +2.03870316562433424052e-15, + -7.01983709041831346144e-15, +2.47715442448130437068e-14, + -8.97670518232499435011e-14, +3.34841966607842919884e-13, + -1.28917396095102890680e-12, +5.13963967348173025100e-12, + -2.12996783842756842877e-11, +9.21831518760500529508e-11, + -4.19035475934189648750e-10, +2.01504975519703286596e-09, + -1.03457624656780970260e-08, +5.74108412545004946722e-08, + -3.50196060308781257119e-07, +2.40648494783721712015e-06, + -1.93619797416608296024e-05, +1.95215518471351631108e-04, + -2.85781685962277938680e-03, +1.03923736576817238437e-01, + +2.72062619048444266945e+00, + }; + + if (x == 0.0) { + return INFINITY; + } + + if (x < 0.0) { + return NAN; + } + + float p; + float q = 0.0; + + if (x <= 2.0) { + float a = A[0]; + + for (uint8_t index = 1; index < 11; index++) { + p = q; + q = a; + a = (x * x - T(2.0)) * q - p + A[index]; + } + + return ::metal::precise::log(T(0.5) * x) * modified_bessel_i1_forward(x) + + 0.5 * (a - p) / x; + } + + float b = B[0]; + + for (uint8_t index = 1; index < 25; index++) { + p = q; + q = b; + b = (8.0 / x - 2.0) * q - p + B[index]; + } + + return ::metal::precise::exp(-x) * (0.5 * (b - p)) / + ::metal::precise::sqrt(x); +} + +template +inline float scaled_modified_bessel_k0_forward(T x) { + constexpr float A[] = { + +1.37446543561352307156e-16, + +4.25981614279661018399e-14, + +1.03496952576338420167e-11, + +1.90451637722020886025e-09, + +2.53479107902614945675e-07, + +2.28621210311945178607e-05, + +1.26461541144692592338e-03, + +3.59799365153615016266e-02, + +3.44289899924628486886e-01, + -5.35327393233902768720e-01, + }; + + constexpr float B[] = { + +5.30043377268626276149e-18, -1.64758043015242134646e-17, + +5.21039150503902756861e-17, -1.67823109680541210385e-16, + +5.51205597852431940784e-16, -1.84859337734377901440e-15, + +6.34007647740507060557e-15, -2.22751332699166985548e-14, + +8.03289077536357521100e-14, -2.98009692317273043925e-13, + +1.14034058820847496303e-12, -4.51459788337394416547e-12, + +1.85594911495471785253e-11, -7.95748924447710747776e-11, + +3.57739728140030116597e-10, -1.69753450938905987466e-09, + +8.57403401741422608519e-09, -4.66048989768794782956e-08, + +2.76681363944501510342e-07, -1.83175552271911948767e-06, + +1.39498137188764993662e-05, -1.28495495816278026384e-04, + +1.56988388573005337491e-03, -3.14481013119645005427e-02, + +2.44030308206595545468e+00, + }; + + if (x == 0.0) { + return INFINITY; + } + + if (x < 0.0) { + return NAN; + } + + float p; + float q = 0.0; + + if (x <= 2.0) { + float a = A[0]; + + for (uint8_t index = 1; index < 10; index++) { + p = q; + q = a; + a = (x * x - T(2.0)) * q - p + A[index]; + } + + return (0.5 * (a - p) - + ::metal::precise::log(0.5 * x) * modified_bessel_i0_forward(x)) * + ::metal::precise::exp(x); + } + + float b = B[0]; + + for (uint8_t index = 1; index < 25; index++) { + p = q; + q = b; + b = (8.0 / x - 2.0) * q - p + B[index]; + } + + return 0.5 * (b - p) / ::metal::precise::sqrt(x); +} + +template +inline float scaled_modified_bessel_k1_forward(T x) { + constexpr float A[] = { + -7.02386347938628759343e-18, + -2.42744985051936593393e-15, + -6.66690169419932900609e-13, + -1.41148839263352776110e-10, + -2.21338763073472585583e-08, + -2.43340614156596823496e-06, + -1.73028895751305206302e-04, + -6.97572385963986435018e-03, + -1.22611180822657148235e-01, + -3.53155960776544875667e-01, + +1.52530022733894777053e+00, + }; + + constexpr float B[] = { + -5.75674448366501715755e-18, +1.79405087314755922667e-17, + -5.68946255844285935196e-17, +1.83809354436663880070e-16, + -6.05704724837331885336e-16, +2.03870316562433424052e-15, + -7.01983709041831346144e-15, +2.47715442448130437068e-14, + -8.97670518232499435011e-14, +3.34841966607842919884e-13, + -1.28917396095102890680e-12, +5.13963967348173025100e-12, + -2.12996783842756842877e-11, +9.21831518760500529508e-11, + -4.19035475934189648750e-10, +2.01504975519703286596e-09, + -1.03457624656780970260e-08, +5.74108412545004946722e-08, + -3.50196060308781257119e-07, +2.40648494783721712015e-06, + -1.93619797416608296024e-05, +1.95215518471351631108e-04, + -2.85781685962277938680e-03, +1.03923736576817238437e-01, + +2.72062619048444266945e+00, + }; + + if (x == 0.0) { + return INFINITY; + } + + if (x < 0.0) { + return NAN; + } + + float p; + float q = 0.0; + + if (x <= 2.0) { + float a = A[0]; + + for (uint8_t index = 1; index < 11; index++) { + p = q; + q = a; + a = (x * x - 2.0) * q - p + A[index]; + } + + return (::metal::precise::log(0.5 * x) * modified_bessel_i1_forward(x) + + 0.5 * (a - p) / x) * + ::metal::precise::exp(x); + } + + float b = B[0]; + + for (uint8_t index = 1; index < 25; index++) { + p = q; + q = b; + b = (8.0 / x - 2.0) * q - p + B[index]; + } + + return (0.5 * (b - p) / ::metal::precise::sqrt(x)); +} + +template +float chebyshev_polynomial_t_forward(T x, int64_t n) { + if (n < 0) { + return 0.0; + } + + if (::metal::fabs(x) == 1.0) { + if (x > 0.0 || n % 2 == 0) { + return 1.0; + } + + return -1.0; + } + + if ((n > 6) && (::metal::precise::fabs(x) < 1.0)) { + return ::metal::precise::cos(n * ::metal::precise::acos(x)); + } + + if (n == 0) { + return 1.0; + } + + if (n == 1) { + return x; + } + + float p = 1.0; + float q = x; + float r; + + for (int64_t k = 2; k <= n; k++) { + r = (x + x) * q - p; + p = q; + q = r; + } + return r; +} + +template +float chebyshev_polynomial_u_forward(T x, int64_t n) { + if (n < 0) { + return 0.0; + } + + if (::metal::fabs(x) == 1.0) { + if (x > 0.0 || n % 2 == 0) { + return n + 1; + } + + return -(n + 1); + } + + if ((n > 8) && (::metal::fabs(x) < 1.0)) { + const auto acos_x = ::metal::precise::acos(x); + if (::metal::precise::sin(acos_x) != 0.0) { + return ::metal::precise::sin((n + 1) * acos_x) / + ::metal::precise::sin(acos_x); + } + + return (n + 1) * ::metal::precise::cos((n + 1) * acos_x) / x; + } + + if (n == 0) { + return 1.0; + } + + auto q = 2.0 * x; + if (n == 1) { + return q; + } + + auto p = 1.0; + float r; + + for (int64_t k = 2; k <= n; k++) { + r = 2 * x * q - p; + p = q; + q = r; + } + + return r; +} + +template +float chebyshev_polynomial_v_forward(T x, int64_t n) { + if (n < 0) { + return 0.0; + } + + if (::metal::fabs(x) == 1.0) { + if (x > 0.0) { + return 1.0; + } + + if (n % 2 == 0) { + return n + n + 1; + } + + return -(n + n + 1); + } + + if ((n > 8) && (::metal::fabs(x) < 1.0)) { + const auto acos_x = ::metal::precise::acos(x); + if (::metal::precise::sin(.5 * acos_x) != 1.0) { + return ::metal::precise::cos((n + 0.5) * acos_x) / + ::metal::precise::cos(.5 * acos_x); + } + + if (n % 2 == 0) { + return n + n + 1; + } + + return -(n + n + 1); + } + + if (n == 0) { + return 1.0; + } + + auto q = 2.0 * x - 1.0; + if (n == 1) { + return q; + } + + auto p = 1.0; + float r; + + for (int64_t k = 2; k <= n; k++) { + r = 2 * x * q - p; + p = q; + q = r; + } + + return r; +} // chebyshev_polynomial_v_forward(T x, int64_t n) + +template +float chebyshev_polynomial_w_forward(T x, int64_t n) { + if (n < 0) { + return 0.0; + } + + if (::metal::fabs(x) == 1.0) { + if (x > 0.0) { + return n + n + 1; + } + + if (n % 2 == 0) { + return 1.0; + } + + return -1.0; + } + + if ((n > 8) && (::metal::fabs(x) < 1.0)) { + const auto acos_x = ::metal::precise::acos(x); + if (::metal::precise::cos(.5 * acos_x) != 1.0) { + return ::metal::precise::sin((n + 0.5) * acos_x) / + ::metal::precise::sin(.5 * acos_x); + } + + if (x > 0.0) { + return n + n + 1; + } + + if (n % 2 == 0) { + return 1.0; + } + + return -1.0; + } + + if (n == 0) { + return 1.0; + } + + auto q = 2.0 * x + 1.0; + if (n == 1) { + return q; + } + + auto p = 1.0; + float r; + + for (int64_t k = 2; k <= n; k++) { + r = 2.0 * x * q - p; + p = q; + q = r; + } + + return r; +} // chebyshev_polynomial_w_forward(T x, int64_t n) + +template +// TODO: Add 512 if/when double will be supported in Metal +inline constexpr int getHermitianLimit() { + return 128; +} + +template +inline float hermite_polynomial_h_forward(T x, int64_t n) { + if (n < 0) { + return 0.0; + } + + if (n == 0) { + return 1.0; + } + + if (n == 1) { + return x + x; + } + + if (n > getHermitianLimit()) { + return NAN; + } + + float p = 1.0; + float q = x + x; + float r = 0.0; + + for (int64_t k = 2; k < n + n; k += 2) { + r = (x + x) * q - k * p; + p = q; + q = r; + } + + return r; +} // hermite_polynomial_h_forward(T x, int64_t n) + +template +inline float hermite_polynomial_he_forward(T x, int64_t n) { + if (n < 0) { + return 0.0; + } + + if (n == 0) { + return 1.0; + } + + if (n == 1) { + return x; + } + + if (n > getHermitianLimit()) { + return NAN; + } + + float p = 1.0; + float q = x; + float r; + + for (int64_t k = 1; k < n; k++) { + r = x * q - k * p; + p = q; + q = r; + } + + return r; +} // hermite_polynomial_he_forward(T x, int64_t n) + +} // namespace metal +} // namespace c10 diff --git a/phivenv/Lib/site-packages/torch/include/c10/metal/utils.h b/phivenv/Lib/site-packages/torch/include/c10/metal/utils.h new file mode 100644 index 0000000000000000000000000000000000000000..f87f48f8b0aa563df6d8dd072b92c139ea5edc0e --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/metal/utils.h @@ -0,0 +1,334 @@ +// Metal helper functions +#pragma once +#include +#include + +namespace c10 { +namespace metal { + +namespace detail { +template +struct vectypes {}; + +template <> +struct vectypes { + using type4 = float4; + using type3 = float3; + using type2 = float2; +}; + +template <> +struct vectypes { + using type4 = half4; + using type3 = half3; + using type2 = half2; +}; + +#if __METAL_VERSION__ >= 310 +template <> +struct vectypes { + using type4 = bfloat4; + using type3 = bfloat3; + using type2 = bfloat2; +}; +#endif + +template <> +struct vectypes { + using type4 = short4; + using type3 = short3; + using type2 = short2; +}; + +template <> +struct vectypes { + using type4 = int4; + using type3 = int3; + using type2 = int2; +}; + +template <> +struct vectypes { + using type4 = short4; + using type3 = short3; + using type2 = short2; +}; + +template +struct OpMathType { + using type = T; +}; + +template <> +struct OpMathType { + using type = float; +}; + +template <> +struct OpMathType { + using type = int; +}; + +template <> +struct OpMathType { + using type = int; +}; + +template <> +struct OpMathType { + using type = int; +}; + +#if __METAL_VERSION__ >= 310 +template <> +struct OpMathType { + using type = float; +}; +#endif + +// Type promotion structure for higher precision accumulation +template +struct AccumulationType { + using type = T; +}; + +// Specialization for half - promote to float for accumulation +template <> +struct AccumulationType { + using type = float; +}; + +#if __METAL_VERSION__ >= 310 +// Specialization for bfloat - promote to float for accumulation +template <> +struct AccumulationType { + using type = float; +}; +#endif + +} // namespace detail + +template +::metal::enable_if_t<::metal::is_floating_point_v, T> max(T a, T b) { + return ::metal::isunordered(a, b) ? NAN : ::metal::max(a, b); +} + +template +::metal::enable_if_t<::metal::is_integral_v&& ::metal::is_integral_v, T> +max(T a, U b) { + return ::metal::max(a, static_cast(b)); +} + +template +::metal::enable_if_t<::metal::is_floating_point_v, T> min(T a, T b) { + return ::metal::isunordered(a, b) ? NAN : ::metal::min(a, b); +} + +template +::metal::enable_if_t<::metal::is_integral_v&& ::metal::is_integral_v, T> +min(T a, U b) { + return ::metal::min(a, static_cast(b)); +} + +#if __METAL_VERSION__ >= 310 +template <> +inline bfloat min(bfloat a, bfloat b) { + return bfloat( + ::metal::isunordered(a, b) ? NAN : ::metal::min(float(a), float(b))); +} + +template <> +inline bfloat max(bfloat a, bfloat b) { + return bfloat( + ::metal::isunordered(a, b) ? NAN : ::metal::max(float(a), float(b))); +} +#endif + +template +using vec2type_t = typename detail::vectypes::type2; + +template +using vec4type_t = typename detail::vectypes::type4; + +template +using opmath_t = typename detail::OpMathType::type; + +template +using accum_t = typename detail::AccumulationType::type; + +// TODO: Move it to type_traits header may be +template +using result_of = decltype(::metal::declval()(::metal::declval()...)); + +template +constexpr constant bool is_complex_v = + ::metal::is_same_v || ::metal::is_same_v; + +template +constexpr constant bool is_scalar_floating_point_v = + ::metal::is_floating_point_v && ::metal::is_scalar_v; + +template +constexpr constant bool is_scalar_integral_v = + ::metal::is_integral_v && ::metal::is_scalar_v; + +template +using common_dtype = decltype(U(0) + V(0)); + +// floor_divide +template < + typename T, + typename U, + ::metal::enable_if_t< + is_scalar_integral_v && is_scalar_integral_v, + bool> = true> +inline common_dtype floor_divide(T x, U y) { + const auto quot = x / y; + return (x < 0) == (y < 0) ? quot : (x % y != 0) ? quot - 1 : quot; +} + +template < + typename T, + typename U, + ::metal::enable_if_t< + is_scalar_floating_point_v && is_scalar_floating_point_v, + bool> = true> +inline common_dtype floor_divide(T x, U y) { + return ::metal::floor(x / y); +} + +// fmod +template < + typename T, + typename U, + ::metal::enable_if_t< + is_scalar_integral_v && is_scalar_integral_v, + bool> = true> +inline common_dtype fmod(T x, U y) { + return x % y; +} + +template < + typename T, + typename U, + ::metal::enable_if_t< + is_scalar_floating_point_v && is_scalar_floating_point_v, + bool> = true> +inline common_dtype fmod(T x, U y) { + return ::metal::fmod(x, y); +} + +// cast_to primitives +// - No-op if types as the same +template < + typename T, + typename U, + ::metal::enable_if_t<::metal::is_same_v, bool> = true> +inline T cast_to(const U from) { + return from; +} +// - Simple cast between scalar and complex dtypes +template < + typename T, + typename U, + ::metal::enable_if_t< + !::metal::is_same_v && (is_complex_v == is_complex_v), + bool> = true> +inline T cast_to(const U from) { + return static_cast(from); +} + +// - Scalar to complex +template < + typename T, + typename U, + ::metal::enable_if_t && !is_complex_v, bool> = true> +inline T cast_to(const U from) { + return T(float(from), 0.0); +} +// - Complex to scalar (should not really be used, but exists for compliteness) +template < + typename T, + typename U, + ::metal::enable_if_t && is_complex_v, bool> = true> +inline T cast_to(const U from) { + return static_cast(from.x); +} + +// Generalizable math operators (used for both scalar and complex) + +template < + typename T, + typename U, + ::metal::enable_if_t, bool> = true> +inline common_dtype mul(const T x, const U y) { + return x * y; +} + +template < + typename T, + typename U, + ::metal::enable_if_t && is_complex_v, bool> = true> +inline common_dtype mul(const T x, const U y) { + return T(x.x * y.x - x.y * y.y, x.x * y.y + x.y * y.x); +} + +template < + typename T, + typename U, + ::metal::enable_if_t, bool> = true> +inline common_dtype div(const T x, const U y) { + return x / y; +} + +template < + typename T, + typename U, + ::metal::enable_if_t && is_complex_v, bool> = true> +inline common_dtype div(const T x, const U y) { + return T(::metal::dot(x, y), x.y * y.x - x.x * y.y) / ::metal::dot(y, y); +} + +// Remainder operator +template < + typename T, + typename U, + ::metal::enable_if_t< + is_scalar_floating_point_v || is_scalar_floating_point_v, + bool> = true> +inline float remainder(const T x, const U y) { + const auto x_f = static_cast(x); + const auto y_f = static_cast(y); + return x_f - y_f * floor_divide(x_f, y_f); +} + +template < + typename T, + typename U, + ::metal::enable_if_t< + is_scalar_integral_v && is_scalar_integral_v, + bool> = true> +inline common_dtype remainder(const T x, const U y) { + auto rc = x % y; + return rc == 0 || (x ^ y) > 0 ? rc : rc + y; +} + +// Based on algorithm described in +// https://docs.oracle.com/cd/E19957-01/806-3568/ncg_goldberg.html#1202 +inline float log1p(float x) { + const auto xp1 = 1.0f + x; + // First two elements of Taylor series for log(1+x) in Horner's form are: + // log(1+x) = x * (1 - x * (.5 ...)), but if 1 + x == x, then it's just x + if (xp1 == 1.0f) { + return x; + } + auto rc = ::metal::precise::log(xp1); + if (x > -.5 && x < .5) { + // Order of operations is important here for higher precision + rc *= x / (xp1 - 1.0f); + } + return rc; +} + +} // namespace metal +} // namespace c10 diff --git a/phivenv/Lib/site-packages/torch/include/c10/mobile/CPUCachingAllocator.h b/phivenv/Lib/site-packages/torch/include/c10/mobile/CPUCachingAllocator.h new file mode 100644 index 0000000000000000000000000000000000000000..c9b5977013c31499be7c400ef0c6507c1127ae40 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/mobile/CPUCachingAllocator.h @@ -0,0 +1,106 @@ +#pragma once + +#include +#include + +#include +#include +#include + +/* + * CPUCachingAllocator: + * DISCLAIMER: + * This is subject to change (beta) and only supported on mobile builds. + * If code snippet such as in 'Usage pattern' is used outside of mobile + * build you will not observe the intended behavior. + * See below for more information. + * Why? + * It has been observed that some mobile platforms, such as pixel 3, return + * memory aggressively to the system. This results in page faults in some + * cases and ends up hurting performance. This caching allocator aims to address + * that. Furthermore it also allows users to specify their own allocator by + * implementing allocate/free virtual interfaces. What are the cons? There are + * some cons that were observed where use of caching allocator led to worse + * performance on some platforms. Reason being that the caching mechanism used + * by this allocator left us worse off compared to the corresponding platform's + * tuned memory allocator. In that case it seemed better to not use this + * allocator. Note there are some ideas to fix this in the works. + * + * Usage: + * Usage pattern: + * Instantiate and own the caching allocator. + * std::unique_ptr caching_allocator = + * std::make_unique(); + * Use caching allocator with a scoped guard at inference time. + * { + * WithCPUCachingAllocatorGuard(caching_allocator.get()); + * ... model.forward(...); + * } + */ + +namespace c10 { + +class C10_API CPUCachingAllocator { + /* + * What it does: + * Caches all the allocations carried out by this allocator. + * Cache key is the size of the allocation. + * If requested size is found in the cache returns the cached pointer. + * What it does not do: + * No speculative allocation for any future allocations. + */ + private: + inline void* allocate_and_cache(const size_t bytes); + void free_cached(); + + protected: + // Invariants. + // 1. If memory is ever allocated via this allocator then + // the pointer will exist in allocation_map_, unless the allocator + // returned the memory to OS via free_cached. + // 1.1. Therefore even when the said memory is "freed" via this + // allocator (and thus cached), it will continue to stay + // in allocation_map_. Furthermore it will also exist in + // available_map_. Thus an allocated memory pointer can be in both + // allocation_map_ and available_map_ simultaneously. + // 2. Memory pointer maybe removed from allocation_map_, when it + // is freed outside of the scope of this allocator, but was allocated + // by this allocator. + // 3. Available map only contains that memory which was allocated + // by this allocator and subsequently freed by this allocator. + // As a result of above invariants, allocated memory ptr cannot be in + // available_map_ unless it is in allocation_map_ as well. + ska::flat_hash_map> available_map_; + static ska::flat_hash_map allocation_map_; + // Since allocation_map, which is a global instance, is mutated/read via + // all public APIs we need a global mutex. + static std::mutex mutex_; + + public: + static void record_free(void* ptr); + virtual ~CPUCachingAllocator(); + // Checks the cache to see if allocation of size bytes can be found. + // If so return cached memory, else + // allocates memory, records it for caching and returns. + virtual void* allocate(const size_t bytes); + // Checks if the memory being freed is was marked for allocation by + // an earlier call to allocate. If so cache the allocation. + // Otherwise free. + virtual void free(void* ptr); +}; + +CPUCachingAllocator* GetDefaultCPUCachingAllocator(); + +bool ThreadLocalCachingAllocatorEnabled(); +CPUCachingAllocator* GetThreadLocalCachingAllocator(); + +class C10_API WithCPUCachingAllocatorGuard { + public: + WithCPUCachingAllocatorGuard(CPUCachingAllocator* allocator); + ~WithCPUCachingAllocatorGuard(); + + private: + CPUCachingAllocator* prev_caching_allocator_ptr_{nullptr}; +}; + +} // namespace c10 diff --git a/phivenv/Lib/site-packages/torch/include/c10/mobile/CPUProfilingAllocator.h b/phivenv/Lib/site-packages/torch/include/c10/mobile/CPUProfilingAllocator.h new file mode 100644 index 0000000000000000000000000000000000000000..0c8c4830f51dfea31db106b3bea6841084a7ac63 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/mobile/CPUProfilingAllocator.h @@ -0,0 +1,152 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace c10 { + +/* + * Given a sequence of allocations in a thread, AllocationPlan records + * 1. size of each allocation + * 2. Lifetime of each allocation. + * 3. allocation offsets: Memory offset for each allocation in a single blob of + * memory + * 4. Total size of a blob of memory required to satisfy all the allocations. + */ +class C10_API AllocationPlan { + private: + // Records size of each allocation by their sequential allocation ids. + std::vector allocation_sizes; + // This maps one allocation id (X) to another allocation id (Y). + // Allocation X is alive until allocation Y. From allocation Y onwards + // allocation X is not referenced. + // Thus Y is the id of the first allocation after X is freed. + // NB: When an allocation is recorded, along with recording its size, + // we also set the lifetime to be numeric_limits::max() + // This is to track allocations that are made during the scope of + // profiling but were not freed until after the scope ended. + // Such allocations are not managed by profiling allocator. + std::vector allocation_lifetimes; + // Maps an allocation to some offset in a blob of memory. + std::vector allocation_offsets; + uint64_t total_size{0}; + void clear(); + friend class AllocationPlanner; + friend class CPUProfilingAllocator; +}; + +/* + * Map of memory ptr to allocation id. This is auxiliary information only + * used to establish lifetime of allocations. + */ +class C10_API AllocationPlanner { + private: + AllocationPlan* allocation_plan_{nullptr}; + // Maps allocated ptr to its allocation id. + // This is used when freeing the memory to look up the allocation id + // in order to establish the lifetime of a particular allocation. + ska::flat_hash_map allocation_ptr_to_id_; + uint64_t allocation_id_{0}; + bool validation_mode_{false}; + + bool validate_allocation(const uint64_t size, const void* ptr); + bool validate_free(const void* ptr); + + public: + bool validation_success{true}; + + AllocationPlanner() = delete; + AllocationPlanner(AllocationPlan* plan, bool validate = false) + : allocation_plan_(plan), validation_mode_(validate) {} + void record_allocation(const uint64_t size, const void* ptr); + void record_free(const void* ptr); + void formulate_plan(); + void clear(); +}; + +// NOT THREAD SAFE profiling allocator. +class C10_API CPUProfilingAllocator { + private: + const AllocationPlan* plan_{nullptr}; + uint64_t allocation_id_{0}; + uint64_t current_size_{0}; + void* blob_{nullptr}; + ska::flat_hash_map allocation_ptr_to_id_; + + public: + ~CPUProfilingAllocator(); + void set_plan(const AllocationPlan* plan); + void unset_plan(); + void* allocate(const size_t bytes); + void free(void* const ptr); +}; + +/* + * Usage: Profile allocations made by one run of the model. + * AllocationPlan plan; + * { + * WithProfileAllocationGuard profile_guard(&plan); + * module.forward(...); + * } + * plan now contains allocation plan. + */ +class C10_API WithProfileAllocationsGuard { + public: + WithProfileAllocationsGuard(AllocationPlan* plan); + ~WithProfileAllocationsGuard(); + + private: + std::unique_ptr planner_; +}; + +/* + * Usage: Validate allocation plan made with WithProfileAllocationGuard + * bool plan_validation_success, success = true; + * for (some number of representative inputs) + * { + * WithValidateAllocationPlanGuard(&plan, &plan_validation_success); + * module.forward(...); + * success = success && plan_validation_success; + * } + * success == true means allocations are according to plan + * else for some inputs allocation pattern changed. + */ +class C10_API WithValidateAllocationPlanGuard { + public: + WithValidateAllocationPlanGuard(AllocationPlan* plan, bool* success); + ~WithValidateAllocationPlanGuard(); + + private: + std::unique_ptr planner_; + bool* success_; +}; + +AllocationPlanner* GetThreadLocalAllocationPlanner(); + +/* + * Usage: Allocate tensors accordingly to allocation plan + * First make allocation plan. + * See WithProfileAllocationsGuard usage. + * Second validate allocation plan. + * See WithValidateAllocationPlanGuard usage. + * CPUProfilingAllocator profiling_allocator; + * { + * WithProfilingAllocatorGuard allocator_guard(&profiling_allocator, &plan); + * module.forward(...); + * } + */ +class C10_API WithProfilingAllocatorGuard { + public: + WithProfilingAllocatorGuard( + CPUProfilingAllocator* allocator, + const AllocationPlan* plan); + ~WithProfilingAllocatorGuard(); +}; + +CPUProfilingAllocator* GetThreadLocalProfilingAllocator(); + +} // namespace c10 diff --git a/phivenv/Lib/site-packages/torch/include/c10/test/util/Macros.h b/phivenv/Lib/site-packages/torch/include/c10/test/util/Macros.h new file mode 100644 index 0000000000000000000000000000000000000000..af0d5c931537cc07d81790a2d28b8cb45fe701f4 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/test/util/Macros.h @@ -0,0 +1,9 @@ +#ifndef C10_TEST_CORE_MACROS_MACROS_H_ + +#ifdef _WIN32 +#define DISABLED_ON_WINDOWS(x) DISABLED_##x +#else +#define DISABLED_ON_WINDOWS(x) x +#endif + +#endif // C10_MACROS_MACROS_H_ diff --git a/phivenv/Lib/site-packages/torch/include/c10/test/util/complex_math_test_common.h b/phivenv/Lib/site-packages/torch/include/c10/test/util/complex_math_test_common.h new file mode 100644 index 0000000000000000000000000000000000000000..4e7a0b82155cdf4466f4f0a5f8aabb25446650af --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/test/util/complex_math_test_common.h @@ -0,0 +1,667 @@ +// Warning: this file is included twice in +// aten/src/ATen/test/cuda_complex_math_test.cu + +#include +#include + +#ifndef PI +#define PI 3.141592653589793238463 +#endif + +#ifndef tol +#define tol 1e-6 +#endif + +// Exponential functions + +C10_DEFINE_TEST(TestExponential, IPi) { + // exp(i*pi) = -1 + { + c10::complex e_i_pi = std::exp(c10::complex(0, float(PI))); + C10_ASSERT_NEAR(e_i_pi.real(), -1, tol); + C10_ASSERT_NEAR(e_i_pi.imag(), 0, tol); + } + { + c10::complex e_i_pi = ::exp(c10::complex(0, float(PI))); + C10_ASSERT_NEAR(e_i_pi.real(), -1, tol); + C10_ASSERT_NEAR(e_i_pi.imag(), 0, tol); + } + { + c10::complex e_i_pi = std::exp(c10::complex(0, PI)); + C10_ASSERT_NEAR(e_i_pi.real(), -1, tol); + C10_ASSERT_NEAR(e_i_pi.imag(), 0, tol); + } + { + c10::complex e_i_pi = ::exp(c10::complex(0, PI)); + C10_ASSERT_NEAR(e_i_pi.real(), -1, tol); + C10_ASSERT_NEAR(e_i_pi.imag(), 0, tol); + } +} + +C10_DEFINE_TEST(TestExponential, EulerFormula) { + // exp(ix) = cos(x) + i * sin(x) + { + c10::complex x(0.1, 1.2); + c10::complex e = std::exp(x); + float expected_real = std::exp(x.real()) * std::cos(x.imag()); + float expected_imag = std::exp(x.real()) * std::sin(x.imag()); + C10_ASSERT_NEAR(e.real(), expected_real, tol); + C10_ASSERT_NEAR(e.imag(), expected_imag, tol); + } + { + c10::complex x(0.1, 1.2); + c10::complex e = ::exp(x); + float expected_real = ::exp(x.real()) * ::cos(x.imag()); + float expected_imag = ::exp(x.real()) * ::sin(x.imag()); + C10_ASSERT_NEAR(e.real(), expected_real, tol); + C10_ASSERT_NEAR(e.imag(), expected_imag, tol); + } + { + c10::complex x(0.1, 1.2); + c10::complex e = std::exp(x); + float expected_real = std::exp(x.real()) * std::cos(x.imag()); + float expected_imag = std::exp(x.real()) * std::sin(x.imag()); + C10_ASSERT_NEAR(e.real(), expected_real, tol); + C10_ASSERT_NEAR(e.imag(), expected_imag, tol); + } + { + c10::complex x(0.1, 1.2); + c10::complex e = ::exp(x); + float expected_real = ::exp(x.real()) * ::cos(x.imag()); + float expected_imag = ::exp(x.real()) * ::sin(x.imag()); + C10_ASSERT_NEAR(e.real(), expected_real, tol); + C10_ASSERT_NEAR(e.imag(), expected_imag, tol); + } +} + +C10_DEFINE_TEST(TestExpm1, Normal) { + // expm1(x) = exp(x) - 1 + { + c10::complex x(0.1, 1.2); + c10::complex l1 = std::expm1(x); + c10::complex l2 = std::exp(x) - 1.0f; + C10_ASSERT_NEAR(l1.real(), l2.real(), tol); + C10_ASSERT_NEAR(l1.imag(), l2.imag(), tol); + } + { + c10::complex x(0.1, 1.2); + c10::complex l1 = std::expm1(x); + c10::complex l2 = std::exp(x) - 1.0; + C10_ASSERT_NEAR(l1.real(), l2.real(), tol); + C10_ASSERT_NEAR(l1.imag(), l2.imag(), tol); + } +} + +C10_DEFINE_TEST(TestExpm1, Small) { + // expm1(x) = exp(x) - 1 + // expm1(x) provides greater precision than exp(x) - 1 for small values of x + { + c10::complex x(1e-30, 1e-30); + c10::complex l1 = std::expm1(x); + C10_ASSERT_NEAR(l1.real(), 1e-30, tol); + C10_ASSERT_NEAR(l1.imag(), 1e-30, tol); + } + { + c10::complex x(1e-100, 1e-100); + c10::complex l1 = std::expm1(x); + C10_ASSERT_NEAR(l1.real(), 1e-30, tol); + C10_ASSERT_NEAR(l1.imag(), 1e-30, tol); + } +} + +C10_DEFINE_TEST(TestLog, Definition) { + // log(x) = log(r) + i*theta + { + c10::complex x(1.2, 3.4); + c10::complex l = std::log(x); + float expected_real = std::log(std::abs(x)); + float expected_imag = std::arg(x); + C10_ASSERT_NEAR(l.real(), expected_real, tol); + C10_ASSERT_NEAR(l.imag(), expected_imag, tol); + } + { + c10::complex x(1.2, 3.4); + c10::complex l = ::log(x); + float expected_real = ::log(std::abs(x)); + float expected_imag = std::arg(x); + C10_ASSERT_NEAR(l.real(), expected_real, tol); + C10_ASSERT_NEAR(l.imag(), expected_imag, tol); + } + { + c10::complex x(1.2, 3.4); + c10::complex l = std::log(x); + float expected_real = std::log(std::abs(x)); + float expected_imag = std::arg(x); + C10_ASSERT_NEAR(l.real(), expected_real, tol); + C10_ASSERT_NEAR(l.imag(), expected_imag, tol); + } + { + c10::complex x(1.2, 3.4); + c10::complex l = ::log(x); + float expected_real = ::log(std::abs(x)); + float expected_imag = std::arg(x); + C10_ASSERT_NEAR(l.real(), expected_real, tol); + C10_ASSERT_NEAR(l.imag(), expected_imag, tol); + } +} + +C10_DEFINE_TEST(TestLog10, Rev) { + // log10(10^x) = x + { + c10::complex x(0.1, 1.2); + c10::complex l = std::log10(std::pow(float(10), x)); + C10_ASSERT_NEAR(l.real(), float(0.1), tol); + C10_ASSERT_NEAR(l.imag(), float(1.2), tol); + } + { + c10::complex x(0.1, 1.2); + c10::complex l = ::log10(::pow(float(10), x)); + C10_ASSERT_NEAR(l.real(), float(0.1), tol); + C10_ASSERT_NEAR(l.imag(), float(1.2), tol); + } + { + c10::complex x(0.1, 1.2); + c10::complex l = std::log10(std::pow(double(10), x)); + C10_ASSERT_NEAR(l.real(), double(0.1), tol); + C10_ASSERT_NEAR(l.imag(), double(1.2), tol); + } + { + c10::complex x(0.1, 1.2); + c10::complex l = ::log10(::pow(double(10), x)); + C10_ASSERT_NEAR(l.real(), double(0.1), tol); + C10_ASSERT_NEAR(l.imag(), double(1.2), tol); + } +} + +C10_DEFINE_TEST(TestLog2, Rev) { + // log2(2^x) = x + { + c10::complex x(0.1, 1.2); + c10::complex l = std::log2(std::pow(float(2), x)); + C10_ASSERT_NEAR(l.real(), float(0.1), tol); + C10_ASSERT_NEAR(l.imag(), float(1.2), tol); + } + { + c10::complex x(0.1, 1.2); + c10::complex l = ::log2(std::pow(float(2), x)); + C10_ASSERT_NEAR(l.real(), float(0.1), tol); + C10_ASSERT_NEAR(l.imag(), float(1.2), tol); + } + { + c10::complex x(0.1, 1.2); + c10::complex l = std::log2(std::pow(double(2), x)); + C10_ASSERT_NEAR(l.real(), double(0.1), tol); + C10_ASSERT_NEAR(l.imag(), double(1.2), tol); + } + { + c10::complex x(0.1, 1.2); + c10::complex l = ::log2(std::pow(double(2), x)); + C10_ASSERT_NEAR(l.real(), double(0.1), tol); + C10_ASSERT_NEAR(l.imag(), double(1.2), tol); + } +} + +C10_DEFINE_TEST(TestLog1p, Normal) { + // log1p(x) = log(1 + x) + { + c10::complex x(0.1, 1.2); + c10::complex l1 = std::log1p(x); + c10::complex l2 = std::log(1.0f + x); + C10_ASSERT_NEAR(l1.real(), l2.real(), tol); + C10_ASSERT_NEAR(l1.imag(), l2.imag(), tol); + } + { + c10::complex x(0.1, 1.2); + c10::complex l1 = std::log1p(x); + c10::complex l2 = std::log(1.0 + x); + C10_ASSERT_NEAR(l1.real(), l2.real(), tol); + C10_ASSERT_NEAR(l1.imag(), l2.imag(), tol); + } +} + +C10_DEFINE_TEST(TestLog1p, Small) { + // log(1 + x) ~ x for |x| << 1 + { + c10::complex x(1e-9, 2e-9); + c10::complex l = std::log1p(x); + C10_ASSERT_NEAR(l.real() / x.real(), 1, tol); + C10_ASSERT_NEAR(l.imag() / x.imag(), 1, tol); + } + { + c10::complex x(1e-100, 2e-100); + c10::complex l = std::log1p(x); + C10_ASSERT_NEAR(l.real() / x.real(), 1, tol); + C10_ASSERT_NEAR(l.imag() / x.imag(), 1, tol); + } +} + +C10_DEFINE_TEST(TestLog1p, Extreme) { + // log(1 + x) ~ x for |x| << 1 and in the brink of overflow / underflow + { + c10::complex x(-1, 1e-30); + c10::complex l = std::log1p(x); + C10_ASSERT_NEAR(l.real(), -69.07755278982137, tol); + C10_ASSERT_NEAR(l.imag(), 1.5707963267948966, tol); + } + { + c10::complex x(-1, 1e30); + c10::complex l = std::log1p(x); + C10_ASSERT_NEAR(l.real(), 69.07755278982137, tol); + C10_ASSERT_NEAR(l.imag(), 1.5707963267948966, tol); + } + { + c10::complex x(1e30, 1); + c10::complex l = std::log1p(x); + C10_ASSERT_NEAR(l.real(), 69.07755278982137, tol); + C10_ASSERT_NEAR(l.imag(), 1e-30, tol); + } + { + c10::complex x(1e-30, 1); + c10::complex l = std::log1p(x); + C10_ASSERT_NEAR(l.real(), 0.34657359027997264, tol); + C10_ASSERT_NEAR(l.imag(), 0.7853981633974483, tol); + } + { + c10::complex x(1e30, 1e30); + c10::complex l = std::log1p(x); + C10_ASSERT_NEAR(l.real(), 69.42412638010134, tol); + C10_ASSERT_NEAR(l.imag(), 0.7853981633974483, tol); + } + { + c10::complex x(1e-38, 1e-38); + c10::complex l = std::log1p(x); + C10_ASSERT_NEAR(l.real(), 1e-38, tol); + C10_ASSERT_NEAR(l.imag(), 1e-38, tol); + } + { + c10::complex x(1e-38, 2e-30); + c10::complex l = std::log1p(x); + C10_ASSERT_NEAR(l.real(), 1e-30, tol); + C10_ASSERT_NEAR(l.imag(), 2e-30, tol); + } + { + c10::complex x(-1, 1e-250); + c10::complex l = std::log1p(x); + C10_ASSERT_NEAR(l.real(), -575.6462732485114, tol); + C10_ASSERT_NEAR(l.imag(), 1.5707963267948966, tol); + } + { + c10::complex x(-1, 1e250); + c10::complex l = std::log1p(x); + C10_ASSERT_NEAR(l.real(), 575.6462732485114, tol); + C10_ASSERT_NEAR(l.imag(), 1.5707963267948966, tol); + } + { + c10::complex x(1e250, 1); + c10::complex l = std::log1p(x); + C10_ASSERT_NEAR(l.real(), 575.6462732485114, tol); + C10_ASSERT_NEAR(l.imag(), 1e-250, tol); + } + { + c10::complex x(1e-250, 1); + c10::complex l = std::log1p(x); + C10_ASSERT_NEAR(l.real(), 0.34657359027997264, tol); + C10_ASSERT_NEAR(l.imag(), 0.7853981633974483, tol); + } + { + c10::complex x(1e250, 1e250); + c10::complex l = std::log1p(x); + C10_ASSERT_NEAR(l.real(), 575.9928468387914, tol); + C10_ASSERT_NEAR(l.imag(), 0.7853981633974483, tol); + } + { + c10::complex x(1e-250, 1e-250); + c10::complex l = std::log1p(x); + C10_ASSERT_NEAR(l.real(), 1e-250, tol); + C10_ASSERT_NEAR(l.imag(), 1e-250, tol); + } + { + c10::complex x(1e-250, 2e-250); + c10::complex l = std::log1p(x); + C10_ASSERT_NEAR(l.real(), 1e-250, tol); + C10_ASSERT_NEAR(l.imag(), 2e-250, tol); + } + { + c10::complex x(2e-308, 1.5e-250); + c10::complex l = std::log1p(x); + C10_ASSERT_NEAR(l.real(), 2e-308, tol); + C10_ASSERT_NEAR(l.imag(), 1.5e-308, tol); + } +} + +// Power functions + +C10_DEFINE_TEST(TestPowSqrt, Equal) { + // x^0.5 = sqrt(x) + { + c10::complex x(0.1, 1.2); + c10::complex y = std::pow(x, float(0.5)); + c10::complex z = std::sqrt(x); + C10_ASSERT_NEAR(y.real(), z.real(), tol); + C10_ASSERT_NEAR(y.imag(), z.imag(), tol); + } + { + c10::complex x(0.1, 1.2); + c10::complex y = ::pow(x, float(0.5)); + c10::complex z = ::sqrt(x); + C10_ASSERT_NEAR(y.real(), z.real(), tol); + C10_ASSERT_NEAR(y.imag(), z.imag(), tol); + } + { + c10::complex x(0.1, 1.2); + c10::complex y = std::pow(x, double(0.5)); + c10::complex z = std::sqrt(x); + C10_ASSERT_NEAR(y.real(), z.real(), tol); + C10_ASSERT_NEAR(y.imag(), z.imag(), tol); + } + { + c10::complex x(0.1, 1.2); + c10::complex y = ::pow(x, double(0.5)); + c10::complex z = ::sqrt(x); + C10_ASSERT_NEAR(y.real(), z.real(), tol); + C10_ASSERT_NEAR(y.imag(), z.imag(), tol); + } +} + +C10_DEFINE_TEST(TestPow, Square) { + // x^2 = x * x + { + c10::complex x(0.1, 1.2); + c10::complex y = std::pow(x, float(2)); + c10::complex z = x * x; + C10_ASSERT_NEAR(y.real(), z.real(), tol); + C10_ASSERT_NEAR(y.imag(), z.imag(), tol); + } + { + c10::complex x(0.1, 1.2); + c10::complex y = ::pow(x, float(2)); + c10::complex z = x * x; + C10_ASSERT_NEAR(y.real(), z.real(), tol); + C10_ASSERT_NEAR(y.imag(), z.imag(), tol); + } + { + c10::complex x(0.1, 1.2); + c10::complex y = std::pow(x, double(2)); + c10::complex z = x * x; + C10_ASSERT_NEAR(y.real(), z.real(), tol); + C10_ASSERT_NEAR(y.imag(), z.imag(), tol); + } + { + c10::complex x(0.1, 1.2); + c10::complex y = ::pow(x, double(2)); + c10::complex z = x * x; + C10_ASSERT_NEAR(y.real(), z.real(), tol); + C10_ASSERT_NEAR(y.imag(), z.imag(), tol); + } +} + +// Trigonometric functions and hyperbolic functions + +C10_DEFINE_TEST(TestSinCosSinhCosh, Identity) { + // sin(x + i * y) = sin(x) * cosh(y) + i * cos(x) * sinh(y) + // cos(x + i * y) = cos(x) * cosh(y) - i * sin(x) * sinh(y) + { + c10::complex x(0.1, 1.2); + c10::complex y = std::sin(x); + float expected_real = std::sin(x.real()) * std::cosh(x.imag()); + float expected_imag = std::cos(x.real()) * std::sinh(x.imag()); + C10_ASSERT_NEAR(y.real(), expected_real, tol); + C10_ASSERT_NEAR(y.imag(), expected_imag, tol); + } + { + c10::complex x(0.1, 1.2); + c10::complex y = ::sin(x); + float expected_real = ::sin(x.real()) * ::cosh(x.imag()); + float expected_imag = ::cos(x.real()) * ::sinh(x.imag()); + C10_ASSERT_NEAR(y.real(), expected_real, tol); + C10_ASSERT_NEAR(y.imag(), expected_imag, tol); + } + { + c10::complex x(0.1, 1.2); + c10::complex y = std::cos(x); + float expected_real = std::cos(x.real()) * std::cosh(x.imag()); + float expected_imag = -std::sin(x.real()) * std::sinh(x.imag()); + C10_ASSERT_NEAR(y.real(), expected_real, tol); + C10_ASSERT_NEAR(y.imag(), expected_imag, tol); + } + { + c10::complex x(0.1, 1.2); + c10::complex y = ::cos(x); + float expected_real = ::cos(x.real()) * ::cosh(x.imag()); + float expected_imag = -::sin(x.real()) * ::sinh(x.imag()); + C10_ASSERT_NEAR(y.real(), expected_real, tol); + C10_ASSERT_NEAR(y.imag(), expected_imag, tol); + } + { + c10::complex x(0.1, 1.2); + c10::complex y = std::sin(x); + float expected_real = std::sin(x.real()) * std::cosh(x.imag()); + float expected_imag = std::cos(x.real()) * std::sinh(x.imag()); + C10_ASSERT_NEAR(y.real(), expected_real, tol); + C10_ASSERT_NEAR(y.imag(), expected_imag, tol); + } + { + c10::complex x(0.1, 1.2); + c10::complex y = ::sin(x); + float expected_real = ::sin(x.real()) * ::cosh(x.imag()); + float expected_imag = ::cos(x.real()) * ::sinh(x.imag()); + C10_ASSERT_NEAR(y.real(), expected_real, tol); + C10_ASSERT_NEAR(y.imag(), expected_imag, tol); + } + { + c10::complex x(0.1, 1.2); + c10::complex y = std::cos(x); + float expected_real = std::cos(x.real()) * std::cosh(x.imag()); + float expected_imag = -std::sin(x.real()) * std::sinh(x.imag()); + C10_ASSERT_NEAR(y.real(), expected_real, tol); + C10_ASSERT_NEAR(y.imag(), expected_imag, tol); + } + { + c10::complex x(0.1, 1.2); + c10::complex y = ::cos(x); + float expected_real = ::cos(x.real()) * ::cosh(x.imag()); + float expected_imag = -::sin(x.real()) * ::sinh(x.imag()); + C10_ASSERT_NEAR(y.real(), expected_real, tol); + C10_ASSERT_NEAR(y.imag(), expected_imag, tol); + } +} + +C10_DEFINE_TEST(TestTan, Identity) { + // tan(x) = sin(x) / cos(x) + { + c10::complex x(0.1, 1.2); + c10::complex y = std::tan(x); + c10::complex z = std::sin(x) / std::cos(x); + C10_ASSERT_NEAR(y.real(), z.real(), tol); + C10_ASSERT_NEAR(y.imag(), z.imag(), tol); + } + { + c10::complex x(0.1, 1.2); + c10::complex y = ::tan(x); + c10::complex z = ::sin(x) / ::cos(x); + C10_ASSERT_NEAR(y.real(), z.real(), tol); + C10_ASSERT_NEAR(y.imag(), z.imag(), tol); + } + { + c10::complex x(0.1, 1.2); + c10::complex y = std::tan(x); + c10::complex z = std::sin(x) / std::cos(x); + C10_ASSERT_NEAR(y.real(), z.real(), tol); + C10_ASSERT_NEAR(y.imag(), z.imag(), tol); + } + { + c10::complex x(0.1, 1.2); + c10::complex y = ::tan(x); + c10::complex z = ::sin(x) / ::cos(x); + C10_ASSERT_NEAR(y.real(), z.real(), tol); + C10_ASSERT_NEAR(y.imag(), z.imag(), tol); + } +} + +C10_DEFINE_TEST(TestTanh, Identity) { + // tanh(x) = sinh(x) / cosh(x) + { + c10::complex x(0.1, 1.2); + c10::complex y = std::tanh(x); + c10::complex z = std::sinh(x) / std::cosh(x); + C10_ASSERT_NEAR(y.real(), z.real(), tol); + C10_ASSERT_NEAR(y.imag(), z.imag(), tol); + } + { + c10::complex x(0.1, 1.2); + c10::complex y = ::tanh(x); + c10::complex z = ::sinh(x) / ::cosh(x); + C10_ASSERT_NEAR(y.real(), z.real(), tol); + C10_ASSERT_NEAR(y.imag(), z.imag(), tol); + } + { + c10::complex x(0.1, 1.2); + c10::complex y = std::tanh(x); + c10::complex z = std::sinh(x) / std::cosh(x); + C10_ASSERT_NEAR(y.real(), z.real(), tol); + C10_ASSERT_NEAR(y.imag(), z.imag(), tol); + } + { + c10::complex x(0.1, 1.2); + c10::complex y = ::tanh(x); + c10::complex z = ::sinh(x) / ::cosh(x); + C10_ASSERT_NEAR(y.real(), z.real(), tol); + C10_ASSERT_NEAR(y.imag(), z.imag(), tol); + } +} + +// Rev trigonometric functions + +C10_DEFINE_TEST(TestRevTrigonometric, Rev) { + // asin(sin(x)) = x + // acos(cos(x)) = x + // atan(tan(x)) = x + { + c10::complex x(0.5, 0.6); + c10::complex s = std::sin(x); + c10::complex ss = std::asin(s); + c10::complex c = std::cos(x); + c10::complex cc = std::acos(c); + c10::complex t = std::tan(x); + c10::complex tt = std::atan(t); + C10_ASSERT_NEAR(x.real(), ss.real(), tol); + C10_ASSERT_NEAR(x.imag(), ss.imag(), tol); + C10_ASSERT_NEAR(x.real(), cc.real(), tol); + C10_ASSERT_NEAR(x.imag(), cc.imag(), tol); + C10_ASSERT_NEAR(x.real(), tt.real(), tol); + C10_ASSERT_NEAR(x.imag(), tt.imag(), tol); + } + { + c10::complex x(0.5, 0.6); + c10::complex s = ::sin(x); + c10::complex ss = ::asin(s); + c10::complex c = ::cos(x); + c10::complex cc = ::acos(c); + c10::complex t = ::tan(x); + c10::complex tt = ::atan(t); + C10_ASSERT_NEAR(x.real(), ss.real(), tol); + C10_ASSERT_NEAR(x.imag(), ss.imag(), tol); + C10_ASSERT_NEAR(x.real(), cc.real(), tol); + C10_ASSERT_NEAR(x.imag(), cc.imag(), tol); + C10_ASSERT_NEAR(x.real(), tt.real(), tol); + C10_ASSERT_NEAR(x.imag(), tt.imag(), tol); + } + { + c10::complex x(0.5, 0.6); + c10::complex s = std::sin(x); + c10::complex ss = std::asin(s); + c10::complex c = std::cos(x); + c10::complex cc = std::acos(c); + c10::complex t = std::tan(x); + c10::complex tt = std::atan(t); + C10_ASSERT_NEAR(x.real(), ss.real(), tol); + C10_ASSERT_NEAR(x.imag(), ss.imag(), tol); + C10_ASSERT_NEAR(x.real(), cc.real(), tol); + C10_ASSERT_NEAR(x.imag(), cc.imag(), tol); + C10_ASSERT_NEAR(x.real(), tt.real(), tol); + C10_ASSERT_NEAR(x.imag(), tt.imag(), tol); + } + { + c10::complex x(0.5, 0.6); + c10::complex s = ::sin(x); + c10::complex ss = ::asin(s); + c10::complex c = ::cos(x); + c10::complex cc = ::acos(c); + c10::complex t = ::tan(x); + c10::complex tt = ::atan(t); + C10_ASSERT_NEAR(x.real(), ss.real(), tol); + C10_ASSERT_NEAR(x.imag(), ss.imag(), tol); + C10_ASSERT_NEAR(x.real(), cc.real(), tol); + C10_ASSERT_NEAR(x.imag(), cc.imag(), tol); + C10_ASSERT_NEAR(x.real(), tt.real(), tol); + C10_ASSERT_NEAR(x.imag(), tt.imag(), tol); + } +} + +// Rev hyperbolic functions + +C10_DEFINE_TEST(TestRevHyperbolic, Rev) { + // asinh(sinh(x)) = x + // acosh(cosh(x)) = x + // atanh(tanh(x)) = x + { + c10::complex x(0.5, 0.6); + c10::complex s = std::sinh(x); + c10::complex ss = std::asinh(s); + c10::complex c = std::cosh(x); + c10::complex cc = std::acosh(c); + c10::complex t = std::tanh(x); + c10::complex tt = std::atanh(t); + C10_ASSERT_NEAR(x.real(), ss.real(), tol); + C10_ASSERT_NEAR(x.imag(), ss.imag(), tol); + C10_ASSERT_NEAR(x.real(), cc.real(), tol); + C10_ASSERT_NEAR(x.imag(), cc.imag(), tol); + C10_ASSERT_NEAR(x.real(), tt.real(), tol); + C10_ASSERT_NEAR(x.imag(), tt.imag(), tol); + } + { + c10::complex x(0.5, 0.6); + c10::complex s = ::sinh(x); + c10::complex ss = ::asinh(s); + c10::complex c = ::cosh(x); + c10::complex cc = ::acosh(c); + c10::complex t = ::tanh(x); + c10::complex tt = ::atanh(t); + C10_ASSERT_NEAR(x.real(), ss.real(), tol); + C10_ASSERT_NEAR(x.imag(), ss.imag(), tol); + C10_ASSERT_NEAR(x.real(), cc.real(), tol); + C10_ASSERT_NEAR(x.imag(), cc.imag(), tol); + C10_ASSERT_NEAR(x.real(), tt.real(), tol); + C10_ASSERT_NEAR(x.imag(), tt.imag(), tol); + } + { + c10::complex x(0.5, 0.6); + c10::complex s = std::sinh(x); + c10::complex ss = std::asinh(s); + c10::complex c = std::cosh(x); + c10::complex cc = std::acosh(c); + c10::complex t = std::tanh(x); + c10::complex tt = std::atanh(t); + C10_ASSERT_NEAR(x.real(), ss.real(), tol); + C10_ASSERT_NEAR(x.imag(), ss.imag(), tol); + C10_ASSERT_NEAR(x.real(), cc.real(), tol); + C10_ASSERT_NEAR(x.imag(), cc.imag(), tol); + C10_ASSERT_NEAR(x.real(), tt.real(), tol); + C10_ASSERT_NEAR(x.imag(), tt.imag(), tol); + } + { + c10::complex x(0.5, 0.6); + c10::complex s = ::sinh(x); + c10::complex ss = ::asinh(s); + c10::complex c = ::cosh(x); + c10::complex cc = ::acosh(c); + c10::complex t = ::tanh(x); + c10::complex tt = ::atanh(t); + C10_ASSERT_NEAR(x.real(), ss.real(), tol); + C10_ASSERT_NEAR(x.imag(), ss.imag(), tol); + C10_ASSERT_NEAR(x.real(), cc.real(), tol); + C10_ASSERT_NEAR(x.imag(), cc.imag(), tol); + C10_ASSERT_NEAR(x.real(), tt.real(), tol); + C10_ASSERT_NEAR(x.imag(), tt.imag(), tol); + } +} diff --git a/phivenv/Lib/site-packages/torch/include/c10/test/util/complex_test_common.h b/phivenv/Lib/site-packages/torch/include/c10/test/util/complex_test_common.h new file mode 100644 index 0000000000000000000000000000000000000000..032b7380507bde2686c2aea64a5ed5816f381eb4 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/test/util/complex_test_common.h @@ -0,0 +1,658 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#if (defined(__CUDACC__) || defined(__HIPCC__)) +#define MAYBE_GLOBAL __global__ +#else +#define MAYBE_GLOBAL +#endif + +#define PI 3.141592653589793238463 + +namespace memory { + +MAYBE_GLOBAL void test_size() { + static_assert(sizeof(c10::complex) == 2 * sizeof(float), ""); + static_assert(sizeof(c10::complex) == 2 * sizeof(double), ""); +} + +MAYBE_GLOBAL void test_align() { + static_assert(alignof(c10::complex) == 2 * sizeof(float), ""); + static_assert(alignof(c10::complex) == 2 * sizeof(double), ""); +} + +MAYBE_GLOBAL void test_pod() { + static_assert(std::is_standard_layout>::value, ""); + static_assert(std::is_standard_layout>::value, ""); +} + +TEST(TestMemory, ReinterpretCast) { + { + std::complex z(1, 2); + c10::complex zz = *reinterpret_cast*>(&z); + ASSERT_EQ(zz.real(), float(1)); + ASSERT_EQ(zz.imag(), float(2)); + } + + { + c10::complex z(3, 4); + std::complex zz = *reinterpret_cast*>(&z); + ASSERT_EQ(zz.real(), float(3)); + ASSERT_EQ(zz.imag(), float(4)); + } + + { + std::complex z(1, 2); + c10::complex zz = *reinterpret_cast*>(&z); + ASSERT_EQ(zz.real(), double(1)); + ASSERT_EQ(zz.imag(), double(2)); + } + + { + c10::complex z(3, 4); + std::complex zz = *reinterpret_cast*>(&z); + ASSERT_EQ(zz.real(), double(3)); + ASSERT_EQ(zz.imag(), double(4)); + } +} + +#if defined(__CUDACC__) || defined(__HIPCC__) +TEST(TestMemory, ThrustReinterpretCast) { + { + thrust::complex z(1, 2); + c10::complex zz = *reinterpret_cast*>(&z); + ASSERT_EQ(zz.real(), float(1)); + ASSERT_EQ(zz.imag(), float(2)); + } + + { + c10::complex z(3, 4); + thrust::complex zz = *reinterpret_cast*>(&z); + ASSERT_EQ(zz.real(), float(3)); + ASSERT_EQ(zz.imag(), float(4)); + } + + { + thrust::complex z(1, 2); + c10::complex zz = *reinterpret_cast*>(&z); + ASSERT_EQ(zz.real(), double(1)); + ASSERT_EQ(zz.imag(), double(2)); + } + + { + c10::complex z(3, 4); + thrust::complex zz = + *reinterpret_cast*>(&z); + ASSERT_EQ(zz.real(), double(3)); + ASSERT_EQ(zz.imag(), double(4)); + } +} +#endif + +} // namespace memory + +namespace constructors { + +template +C10_HOST_DEVICE void test_construct_from_scalar() { + constexpr scalar_t num1 = scalar_t(1.23); + constexpr scalar_t num2 = scalar_t(4.56); + constexpr scalar_t zero = scalar_t(); + static_assert(c10::complex(num1, num2).real() == num1, ""); + static_assert(c10::complex(num1, num2).imag() == num2, ""); + static_assert(c10::complex(num1).real() == num1, ""); + static_assert(c10::complex(num1).imag() == zero, ""); + static_assert(c10::complex().real() == zero, ""); + static_assert(c10::complex().imag() == zero, ""); +} + +template +C10_HOST_DEVICE void test_construct_from_other() { + constexpr other_t num1 = other_t(1.23); + constexpr other_t num2 = other_t(4.56); + constexpr scalar_t num3 = scalar_t(num1); + constexpr scalar_t num4 = scalar_t(num2); + static_assert( + c10::complex(c10::complex(num1, num2)).real() == num3, + ""); + static_assert( + c10::complex(c10::complex(num1, num2)).imag() == num4, + ""); +} + +MAYBE_GLOBAL void test_convert_constructors() { + test_construct_from_scalar(); + test_construct_from_scalar(); + + static_assert( + std::is_convertible, c10::complex>::value, ""); + static_assert( + !std::is_convertible, c10::complex>::value, + ""); + static_assert( + std::is_convertible, c10::complex>::value, + ""); + static_assert( + std::is_convertible, c10::complex>::value, + ""); + + static_assert( + std::is_constructible, c10::complex>::value, + ""); + static_assert( + std::is_constructible, c10::complex>::value, + ""); + static_assert( + std::is_constructible, c10::complex>::value, + ""); + static_assert( + std::is_constructible, c10::complex>::value, + ""); + + test_construct_from_other(); + test_construct_from_other(); + test_construct_from_other(); + test_construct_from_other(); +} + +template +C10_HOST_DEVICE void test_construct_from_std() { + constexpr scalar_t num1 = scalar_t(1.23); + constexpr scalar_t num2 = scalar_t(4.56); + static_assert( + c10::complex(std::complex(num1, num2)).real() == num1, + ""); + static_assert( + c10::complex(std::complex(num1, num2)).imag() == num2, + ""); +} + +MAYBE_GLOBAL void test_std_conversion() { + test_construct_from_std(); + test_construct_from_std(); +} + +#if defined(__CUDACC__) || defined(__HIPCC__) +template +void test_construct_from_thrust() { + constexpr scalar_t num1 = scalar_t(1.23); + constexpr scalar_t num2 = scalar_t(4.56); + ASSERT_EQ( + c10::complex(thrust::complex(num1, num2)).real(), + num1); + ASSERT_EQ( + c10::complex(thrust::complex(num1, num2)).imag(), + num2); +} + +TEST(TestConstructors, FromThrust) { + test_construct_from_thrust(); + test_construct_from_thrust(); +} +#endif + +TEST(TestConstructors, UnorderedMap) { + std::unordered_map< + c10::complex, + c10::complex, + c10::hash>> + m; + auto key1 = c10::complex(2.5, 3); + auto key2 = c10::complex(2, 0); + auto val1 = c10::complex(2, -3.2); + auto val2 = c10::complex(0, -3); + m[key1] = val1; + m[key2] = val2; + ASSERT_EQ(m[key1], val1); + ASSERT_EQ(m[key2], val2); +} + +} // namespace constructors + +namespace assignment { + +template +constexpr c10::complex one() { + c10::complex result(3, 4); + result = scalar_t(1); + return result; +} + +MAYBE_GLOBAL void test_assign_real() { + static_assert(one().real() == float(1), ""); + static_assert(one().imag() == float(), ""); + static_assert(one().real() == double(1), ""); + static_assert(one().imag() == double(), ""); +} + +constexpr std::tuple, c10::complex> one_two() { + constexpr c10::complex src(1, 2); + c10::complex ret0; + c10::complex ret1; + ret0 = ret1 = src; + return std::make_tuple(ret0, ret1); +} + +MAYBE_GLOBAL void test_assign_other() { + constexpr auto tup = one_two(); + static_assert(std::get>(tup).real() == double(1), ""); + static_assert(std::get>(tup).imag() == double(2), ""); + static_assert(std::get>(tup).real() == float(1), ""); + static_assert(std::get>(tup).imag() == float(2), ""); +} + +constexpr std::tuple, c10::complex> one_two_std() { + constexpr std::complex src(1, 1); + c10::complex ret0; + c10::complex ret1; + ret0 = ret1 = src; + return std::make_tuple(ret0, ret1); +} + +MAYBE_GLOBAL void test_assign_std() { + constexpr auto tup = one_two(); + static_assert(std::get>(tup).real() == double(1), ""); + static_assert(std::get>(tup).imag() == double(2), ""); + static_assert(std::get>(tup).real() == float(1), ""); + static_assert(std::get>(tup).imag() == float(2), ""); +} + +#if defined(__CUDACC__) || defined(__HIPCC__) +C10_HOST_DEVICE std::tuple, c10::complex> +one_two_thrust() { + thrust::complex src(1, 2); + c10::complex ret0; + c10::complex ret1; + ret0 = ret1 = src; + return std::make_tuple(ret0, ret1); +} + +TEST(TestAssignment, FromThrust) { + auto tup = one_two_thrust(); + ASSERT_EQ(std::get>(tup).real(), double(1)); + ASSERT_EQ(std::get>(tup).imag(), double(2)); + ASSERT_EQ(std::get>(tup).real(), float(1)); + ASSERT_EQ(std::get>(tup).imag(), float(2)); +} +#endif + +} // namespace assignment + +namespace literals { + +MAYBE_GLOBAL void test_complex_literals() { + using namespace c10::complex_literals; + static_assert(std::is_same>::value, ""); + static_assert((0.5_if).real() == float(), ""); + static_assert((0.5_if).imag() == float(0.5), ""); + static_assert( + std::is_same>::value, ""); + static_assert((0.5_id).real() == float(), ""); + static_assert((0.5_id).imag() == float(0.5), ""); + + static_assert(std::is_same>::value, ""); + static_assert((1_if).real() == float(), ""); + static_assert((1_if).imag() == float(1), ""); + static_assert(std::is_same>::value, ""); + static_assert((1_id).real() == double(), ""); + static_assert((1_id).imag() == double(1), ""); +} + +} // namespace literals + +namespace real_imag { + +template +constexpr c10::complex zero_one() { + c10::complex result; + result.imag(scalar_t(1)); + return result; +} + +template +constexpr c10::complex one_zero() { + c10::complex result; + result.real(scalar_t(1)); + return result; +} + +MAYBE_GLOBAL void test_real_imag_modify() { + static_assert(zero_one().real() == float(0), ""); + static_assert(zero_one().imag() == float(1), ""); + static_assert(zero_one().real() == double(0), ""); + static_assert(zero_one().imag() == double(1), ""); + + static_assert(one_zero().real() == float(1), ""); + static_assert(one_zero().imag() == float(0), ""); + static_assert(one_zero().real() == double(1), ""); + static_assert(one_zero().imag() == double(0), ""); +} + +} // namespace real_imag + +namespace arithmetic_assign { + +template +constexpr c10::complex p(scalar_t value) { + c10::complex result(scalar_t(2), scalar_t(2)); + result += value; + return result; +} + +template +constexpr c10::complex m(scalar_t value) { + c10::complex result(scalar_t(2), scalar_t(2)); + result -= value; + return result; +} + +template +constexpr c10::complex t(scalar_t value) { + c10::complex result(scalar_t(2), scalar_t(2)); + result *= value; + return result; +} + +template +constexpr c10::complex d(scalar_t value) { + c10::complex result(scalar_t(2), scalar_t(2)); + result /= value; + return result; +} + +template +C10_HOST_DEVICE void test_arithmetic_assign_scalar() { + constexpr c10::complex x = p(scalar_t(1)); + static_assert(x.real() == scalar_t(3), ""); + static_assert(x.imag() == scalar_t(2), ""); + constexpr c10::complex y = m(scalar_t(1)); + static_assert(y.real() == scalar_t(1), ""); + static_assert(y.imag() == scalar_t(2), ""); + constexpr c10::complex z = t(scalar_t(2)); + static_assert(z.real() == scalar_t(4), ""); + static_assert(z.imag() == scalar_t(4), ""); + constexpr c10::complex t = d(scalar_t(2)); + static_assert(t.real() == scalar_t(1), ""); + static_assert(t.imag() == scalar_t(1), ""); +} + +template +constexpr c10::complex p( + scalar_t real, + scalar_t imag, + c10::complex rhs) { + c10::complex result(real, imag); + result += rhs; + return result; +} + +template +constexpr c10::complex m( + scalar_t real, + scalar_t imag, + c10::complex rhs) { + c10::complex result(real, imag); + result -= rhs; + return result; +} + +template +constexpr c10::complex t( + scalar_t real, + scalar_t imag, + c10::complex rhs) { + c10::complex result(real, imag); + result *= rhs; + return result; +} + +template +constexpr c10::complex d( + scalar_t real, + scalar_t imag, + c10::complex rhs) { + c10::complex result(real, imag); + result /= rhs; + return result; +} + +template +C10_HOST_DEVICE void test_arithmetic_assign_complex() { + using namespace c10::complex_literals; + constexpr c10::complex x2 = p(scalar_t(2), scalar_t(2), 1.0_if); + static_assert(x2.real() == scalar_t(2), ""); + static_assert(x2.imag() == scalar_t(3), ""); + constexpr c10::complex x3 = p(scalar_t(2), scalar_t(2), 1.0_id); + static_assert(x3.real() == scalar_t(2), ""); + + // this test is skipped due to a bug in constexpr evaluation + // in nvcc. This bug has already been fixed since CUDA 11.2 +#if !defined(__CUDACC__) || (defined(CUDA_VERSION) && CUDA_VERSION >= 11020) + static_assert(x3.imag() == scalar_t(3), ""); +#endif + + constexpr c10::complex y2 = m(scalar_t(2), scalar_t(2), 1.0_if); + static_assert(y2.real() == scalar_t(2), ""); + static_assert(y2.imag() == scalar_t(1), ""); + constexpr c10::complex y3 = m(scalar_t(2), scalar_t(2), 1.0_id); + static_assert(y3.real() == scalar_t(2), ""); + + // this test is skipped due to a bug in constexpr evaluation + // in nvcc. This bug has already been fixed since CUDA 11.2 +#if !defined(__CUDACC__) || (defined(CUDA_VERSION) && CUDA_VERSION >= 11020) + static_assert(y3.imag() == scalar_t(1), ""); +#endif + + constexpr c10::complex z2 = t(scalar_t(1), scalar_t(-2), 1.0_if); + static_assert(z2.real() == scalar_t(2), ""); + static_assert(z2.imag() == scalar_t(1), ""); + constexpr c10::complex z3 = t(scalar_t(1), scalar_t(-2), 1.0_id); + static_assert(z3.real() == scalar_t(2), ""); + static_assert(z3.imag() == scalar_t(1), ""); + + constexpr c10::complex t2 = d(scalar_t(-1), scalar_t(2), 1.0_if); + static_assert(t2.real() == scalar_t(2), ""); + static_assert(t2.imag() == scalar_t(1), ""); + constexpr c10::complex t3 = d(scalar_t(-1), scalar_t(2), 1.0_id); + static_assert(t3.real() == scalar_t(2), ""); + static_assert(t3.imag() == scalar_t(1), ""); +} + +MAYBE_GLOBAL void test_arithmetic_assign() { + test_arithmetic_assign_scalar(); + test_arithmetic_assign_scalar(); + test_arithmetic_assign_complex(); + test_arithmetic_assign_complex(); +} + +} // namespace arithmetic_assign + +namespace arithmetic { + +template +C10_HOST_DEVICE void test_arithmetic_() { + static_assert( + c10::complex(1, 2) == +c10::complex(1, 2), ""); + static_assert( + c10::complex(-1, -2) == -c10::complex(1, 2), ""); + + static_assert( + c10::complex(1, 2) + c10::complex(3, 4) == + c10::complex(4, 6), + ""); + static_assert( + c10::complex(1, 2) + scalar_t(3) == + c10::complex(4, 2), + ""); + static_assert( + scalar_t(3) + c10::complex(1, 2) == + c10::complex(4, 2), + ""); + + static_assert( + c10::complex(1, 2) - c10::complex(3, 4) == + c10::complex(-2, -2), + ""); + static_assert( + c10::complex(1, 2) - scalar_t(3) == + c10::complex(-2, 2), + ""); + static_assert( + scalar_t(3) - c10::complex(1, 2) == + c10::complex(2, -2), + ""); + + static_assert( + c10::complex(1, 2) * c10::complex(3, 4) == + c10::complex(-5, 10), + ""); + static_assert( + c10::complex(1, 2) * scalar_t(3) == + c10::complex(3, 6), + ""); + static_assert( + scalar_t(3) * c10::complex(1, 2) == + c10::complex(3, 6), + ""); + + static_assert( + c10::complex(-5, 10) / c10::complex(3, 4) == + c10::complex(1, 2), + ""); + static_assert( + c10::complex(5, 10) / scalar_t(5) == + c10::complex(1, 2), + ""); + static_assert( + scalar_t(25) / c10::complex(3, 4) == + c10::complex(3, -4), + ""); +} + +MAYBE_GLOBAL void test_arithmetic() { + test_arithmetic_(); + test_arithmetic_(); +} + +template +void test_binary_ops_for_int_type_(T real, T img, int_t num) { + c10::complex c(real, img); + ASSERT_EQ(c + num, c10::complex(real + num, img)); + ASSERT_EQ(num + c, c10::complex(num + real, img)); + ASSERT_EQ(c - num, c10::complex(real - num, img)); + ASSERT_EQ(num - c, c10::complex(num - real, -img)); + ASSERT_EQ(c * num, c10::complex(real * num, img * num)); + ASSERT_EQ(num * c, c10::complex(num * real, num * img)); + ASSERT_EQ(c / num, c10::complex(real / num, img / num)); + ASSERT_EQ( + num / c, + c10::complex(num * real / std::norm(c), -num * img / std::norm(c))); +} + +template +void test_binary_ops_for_all_int_types_(T real, T img, int8_t i) { + test_binary_ops_for_int_type_(real, img, i); + test_binary_ops_for_int_type_(real, img, i); + test_binary_ops_for_int_type_(real, img, i); + test_binary_ops_for_int_type_(real, img, i); +} + +TEST(TestArithmeticIntScalar, All) { + test_binary_ops_for_all_int_types_(1.0, 0.1, 1); + test_binary_ops_for_all_int_types_(-1.3, -0.2, -2); +} + +} // namespace arithmetic + +namespace equality { + +template +C10_HOST_DEVICE void test_equality_() { + static_assert( + c10::complex(1, 2) == c10::complex(1, 2), ""); + static_assert(c10::complex(1, 0) == scalar_t(1), ""); + static_assert(scalar_t(1) == c10::complex(1, 0), ""); + static_assert( + c10::complex(1, 2) != c10::complex(3, 4), ""); + static_assert(c10::complex(1, 2) != scalar_t(1), ""); + static_assert(scalar_t(1) != c10::complex(1, 2), ""); +} + +MAYBE_GLOBAL void test_equality() { + test_equality_(); + test_equality_(); +} + +} // namespace equality + +namespace io { + +template +void test_io_() { + std::stringstream ss; + c10::complex a(1, 2); + ss << a; + ASSERT_EQ(ss.str(), "(1,2)"); + ss.str("(3,4)"); + ss >> a; + ASSERT_TRUE(a == c10::complex(3, 4)); +} + +TEST(TestIO, All) { + test_io_(); + test_io_(); +} + +} // namespace io + +namespace test_std { + +template +C10_HOST_DEVICE void test_callable_() { + static_assert(std::real(c10::complex(1, 2)) == scalar_t(1), ""); + static_assert(std::imag(c10::complex(1, 2)) == scalar_t(2), ""); + std::abs(c10::complex(1, 2)); + std::arg(c10::complex(1, 2)); + static_assert(std::norm(c10::complex(3, 4)) == scalar_t(25), ""); + static_assert( + std::conj(c10::complex(3, 4)) == c10::complex(3, -4), + ""); + c10::polar(float(1), float(PI / 2)); + c10::polar(double(1), double(PI / 2)); +} + +MAYBE_GLOBAL void test_callable() { + test_callable_(); + test_callable_(); +} + +template +void test_values_() { + ASSERT_EQ(std::abs(c10::complex(3, 4)), scalar_t(5)); + ASSERT_LT(std::abs(std::arg(c10::complex(0, 1)) - PI / 2), 1e-6); + ASSERT_LT( + std::abs( + c10::polar(scalar_t(1), scalar_t(PI / 2)) - + c10::complex(0, 1)), + 1e-6); +} + +TEST(TestStd, BasicFunctions) { + test_values_(); + test_values_(); + // CSQRT edge cases: checks for overflows which are likely to occur + // if square root is computed using polar form + ASSERT_LT( + std::abs(std::sqrt(c10::complex(-1e20, -4988429.2)).real()), 3e-4); + ASSERT_LT( + std::abs(std::sqrt(c10::complex(-1e60, -4988429.2)).real()), + 3e-4); +} + +} // namespace test_std diff --git a/phivenv/Lib/site-packages/torch/include/c10/util/AbortHandler.h b/phivenv/Lib/site-packages/torch/include/c10/util/AbortHandler.h new file mode 100644 index 0000000000000000000000000000000000000000..b883dfac9060ef4002ad569dadeab00d297528ff --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/util/AbortHandler.h @@ -0,0 +1,83 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +namespace c10 { +class AbortHandlerHelper { + public: + static AbortHandlerHelper& getInstance() { +#ifdef _WIN32 + thread_local +#endif // _WIN32 + static AbortHandlerHelper instance; + return instance; + } + + void set(std::terminate_handler handler) { + std::lock_guard lk(mutex); + if (!inited) { + prev = std::set_terminate(handler); + curr = std::get_terminate(); + inited = true; + } + } + + std::terminate_handler getPrev() const { + return prev; + } + + private: + std::terminate_handler prev = nullptr; + std::terminate_handler curr = nullptr; + bool inited = false; + std::mutex mutex; + AbortHandlerHelper() = default; + ~AbortHandlerHelper() { + // Only restore the handler if we are the current one + if (inited && curr == std::get_terminate()) { + std::set_terminate(prev); + } + } + + public: + AbortHandlerHelper(AbortHandlerHelper const&) = delete; + void operator=(AbortHandlerHelper const&) = delete; + AbortHandlerHelper(AbortHandlerHelper&&) = delete; + void operator=(AbortHandlerHelper&&) = delete; +}; + +namespace detail { +C10_ALWAYS_INLINE void terminate_handler() { + std::cout << "Unhandled exception caught in c10/util/AbortHandler.h" << '\n'; + auto backtrace = get_backtrace(); + std::cout << backtrace << '\n' << std::flush; + auto prev_handler = AbortHandlerHelper::getInstance().getPrev(); + if (prev_handler) { + prev_handler(); + } else { + std::abort(); + } +} +} // namespace detail + +C10_ALWAYS_INLINE void set_terminate_handler() { + bool use_custom_terminate = false; + // On Windows it is enabled by default based on + // https://github.com/pytorch/pytorch/pull/50320#issuecomment-763147062 +#ifdef _WIN32 + use_custom_terminate = true; +#endif // _WIN32 + auto result = c10::utils::check_env("TORCH_CUSTOM_TERMINATE"); + if (result != std::nullopt) { + use_custom_terminate = result.value(); + } + if (use_custom_terminate) { + AbortHandlerHelper::getInstance().set(detail::terminate_handler); + } +} +} // namespace c10 diff --git a/phivenv/Lib/site-packages/torch/include/c10/util/AlignOf.h b/phivenv/Lib/site-packages/torch/include/c10/util/AlignOf.h new file mode 100644 index 0000000000000000000000000000000000000000..3fd15693fb7369d94e9ada29509f5240d8fb061d --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/util/AlignOf.h @@ -0,0 +1,176 @@ +//===--- AlignOf.h - Portable calculation of type alignment -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the AlignedCharArray and AlignedCharArrayUnion classes. +// +//===----------------------------------------------------------------------===// + +// ATen: modified from llvm::AlignOf +// replaced LLVM_ALIGNAS with alignas + +#pragma once + +#include + +namespace c10 { + +/// \struct AlignedCharArray +/// \brief Helper for building an aligned character array type. +/// +/// This template is used to explicitly build up a collection of aligned +/// character array types. We have to build these up using a macro and explicit +/// specialization to cope with MSVC (at least till 2015) where only an +/// integer literal can be used to specify an alignment constraint. Once built +/// up here, we can then begin to indirect between these using normal C++ +/// template parameters. + +// MSVC requires special handling here. +#ifndef _MSC_VER + +template +struct AlignedCharArray { + // NOLINTNEXTLINE(*c-arrays) + alignas(Alignment) char buffer[Size]; +}; + +#else // _MSC_VER + +/// \brief Create a type with an aligned char buffer. +template +struct AlignedCharArray; + +// We provide special variations of this template for the most common +// alignments because __declspec(align(...)) doesn't actually work when it is +// a member of a by-value function argument in MSVC, even if the alignment +// request is something reasonably like 8-byte or 16-byte. Note that we can't +// even include the declspec with the union that forces the alignment because +// MSVC warns on the existence of the declspec despite the union member forcing +// proper alignment. + +template +struct AlignedCharArray<1, Size> { + union { + char aligned; + char buffer[Size]; + }; +}; + +template +struct AlignedCharArray<2, Size> { + union { + short aligned; + char buffer[Size]; + }; +}; + +template +struct AlignedCharArray<4, Size> { + union { + int aligned; + char buffer[Size]; + }; +}; + +template +struct AlignedCharArray<8, Size> { + union { + double aligned; + char buffer[Size]; + }; +}; + +// The rest of these are provided with a __declspec(align(...)) and we simply +// can't pass them by-value as function arguments on MSVC. + +#define AT_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(x) \ + template \ + struct AlignedCharArray { \ + __declspec(align(x)) char buffer[Size]; \ + }; + +AT_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(16) +AT_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(32) +AT_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(64) +AT_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(128) + +#undef AT_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT + +#endif // _MSC_VER + +namespace detail { +template < + typename T1, + typename T2 = char, + typename T3 = char, + typename T4 = char, + typename T5 = char, + typename T6 = char, + typename T7 = char, + typename T8 = char, + typename T9 = char, + typename T10 = char> +class AlignerImpl { + T1 t1; + T2 t2; + T3 t3; + T4 t4; + T5 t5; + T6 t6; + T7 t7; + T8 t8; + T9 t9; + T10 t10; + + public: + AlignerImpl() = delete; +}; + +template < + typename T1, + typename T2 = char, + typename T3 = char, + typename T4 = char, + typename T5 = char, + typename T6 = char, + typename T7 = char, + typename T8 = char, + typename T9 = char, + typename T10 = char> +union SizerImpl { + // NOLINTNEXTLINE(*c-arrays) + char arr1[sizeof(T1)], arr2[sizeof(T2)], arr3[sizeof(T3)], arr4[sizeof(T4)], + arr5[sizeof(T5)], arr6[sizeof(T6)], arr7[sizeof(T7)], arr8[sizeof(T8)], + arr9[sizeof(T9)], arr10[sizeof(T10)]; +}; +} // end namespace detail + +/// \brief This union template exposes a suitably aligned and sized character +/// array member which can hold elements of any of up to ten types. +/// +/// These types may be arrays, structs, or any other types. The goal is to +/// expose a char array buffer member which can be used as suitable storage for +/// a placement new of any of these types. Support for more than ten types can +/// be added at the cost of more boilerplate. +template < + typename T1, + typename T2 = char, + typename T3 = char, + typename T4 = char, + typename T5 = char, + typename T6 = char, + typename T7 = char, + typename T8 = char, + typename T9 = char, + typename T10 = char> +struct AlignedCharArrayUnion + : AlignedCharArray< + alignof(detail::AlignerImpl), + sizeof(::c10::detail:: + SizerImpl)> {}; +} // end namespace c10 diff --git a/phivenv/Lib/site-packages/torch/include/c10/util/ApproximateClock.h b/phivenv/Lib/site-packages/torch/include/c10/util/ApproximateClock.h new file mode 100644 index 0000000000000000000000000000000000000000..74a0975370e80a00b628680b7d0c8c1a796ba30c --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/util/ApproximateClock.h @@ -0,0 +1,115 @@ +// Copyright 2023-present Facebook. All Rights Reserved. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(C10_IOS) && defined(C10_MOBILE) +#include // for gettimeofday() +#endif + +#if defined(__i386__) || defined(__x86_64__) || defined(__amd64__) +#define C10_RDTSC +#if defined(_MSC_VER) +#include +#elif defined(__CUDACC__) || defined(__HIPCC__) +#undef C10_RDTSC +#elif defined(__clang__) +// `__rdtsc` is available by default. +// NB: This has to be first, because Clang will also define `__GNUC__` +#elif defined(__GNUC__) +#include +#else +#undef C10_RDTSC +#endif +#endif + +namespace c10 { + +using time_t = int64_t; +using steady_clock_t = std::conditional_t< + std::chrono::high_resolution_clock::is_steady, + std::chrono::high_resolution_clock, + std::chrono::steady_clock>; + +inline time_t getTimeSinceEpoch() { + auto now = std::chrono::system_clock::now().time_since_epoch(); + return std::chrono::duration_cast(now).count(); +} + +inline time_t getTime(bool allow_monotonic = false) { +#if defined(C10_IOS) && defined(C10_MOBILE) + // clock_gettime is only available on iOS 10.0 or newer. Unlike OS X, iOS + // can't rely on CLOCK_REALTIME, as it is defined no matter if clock_gettime + // is implemented or not + struct timeval now; + gettimeofday(&now, NULL); + return static_cast(now.tv_sec) * 1000000000 + + static_cast(now.tv_usec) * 1000; +#elif defined(_WIN32) || defined(__MACH__) + return std::chrono::duration_cast( + steady_clock_t::now().time_since_epoch()) + .count(); +#else + // clock_gettime is *much* faster than std::chrono implementation on Linux + struct timespec t{}; + auto mode = CLOCK_REALTIME; + if (allow_monotonic) { + mode = CLOCK_MONOTONIC; + } + clock_gettime(mode, &t); + return static_cast(t.tv_sec) * 1000000000 + + static_cast(t.tv_nsec); +#endif +} + +// We often do not need to capture true wall times. If a fast mechanism such +// as TSC is available we can use that instead and convert back to epoch time +// during post processing. This greatly reduce the clock's contribution to +// profiling. +// http://btorpey.github.io/blog/2014/02/18/clock-sources-in-linux/ +// https://quick-bench.com/q/r8opkkGZSJMu9wM_XTbDouq-0Io +// TODO: We should use +// `https://github.com/google/benchmark/blob/main/src/cycleclock.h` +inline auto getApproximateTime() { +#if defined(C10_RDTSC) + return static_cast(__rdtsc()); +#else + return getTime(); +#endif +} + +using approx_time_t = decltype(getApproximateTime()); +static_assert( + std::is_same_v || + std::is_same_v, + "Expected either int64_t (`getTime`) or uint64_t (some TSC reads)."); + +// Convert `getCount` results to Nanoseconds since unix epoch. +class C10_API ApproximateClockToUnixTimeConverter final { + public: + ApproximateClockToUnixTimeConverter(); + std::function makeConverter(); + + struct UnixAndApproximateTimePair { + time_t t_; + approx_time_t approx_t_; + }; + static UnixAndApproximateTimePair measurePair(); + + private: + static constexpr size_t replicates = 1001; + using time_pairs = std::array; + time_pairs measurePairs(); + + time_pairs start_times_; +}; + +} // namespace c10 diff --git a/phivenv/Lib/site-packages/torch/include/c10/util/Array.h b/phivenv/Lib/site-packages/torch/include/c10/util/Array.h new file mode 100644 index 0000000000000000000000000000000000000000..ce317b87acda26f2bf07c58c06700c4f7662f26e --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/util/Array.h @@ -0,0 +1,18 @@ +#pragma once + +#include +#include + +namespace c10 { + +// This helper function creates a constexpr std::array +// From a compile time list of values, without requiring you to explicitly +// write out the length. +// +// See also https://stackoverflow.com/a/26351760/23845 +template +inline constexpr auto array_of(T&&... t) -> std::array { + return {{std::forward(t)...}}; +} + +} // namespace c10 diff --git a/phivenv/Lib/site-packages/torch/include/c10/util/ArrayRef.h b/phivenv/Lib/site-packages/torch/include/c10/util/ArrayRef.h new file mode 100644 index 0000000000000000000000000000000000000000..e11098c103c19ec332d820841da521c3d60cb203 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/util/ArrayRef.h @@ -0,0 +1,383 @@ +//===--- ArrayRef.h - Array Reference Wrapper -------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// ATen: modified from llvm::ArrayRef. +// removed llvm-specific functionality +// removed some implicit const -> non-const conversions that rely on +// complicated std::enable_if meta-programming +// removed a bunch of slice variants for simplicity... + +#pragma once + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace c10 { +/// ArrayRef - Represent a constant reference to an array (0 or more elements +/// consecutively in memory), i.e. a start pointer and a length. It allows +/// various APIs to take consecutive elements easily and conveniently. +/// +/// This class does not own the underlying data, it is expected to be used in +/// situations where the data resides in some other buffer, whose lifetime +/// extends past that of the ArrayRef. For this reason, it is not in general +/// safe to store an ArrayRef. +/// +/// This is intended to be trivially copyable, so it should be passed by +/// value. +template +class ArrayRef final { + public: + using iterator = const T*; + using const_iterator = const T*; + using size_type = size_t; + using value_type = T; + + using reverse_iterator = std::reverse_iterator; + + private: + /// The start of the array, in an external buffer. + const T* Data; + + /// The number of elements. + size_type Length; + + void debugCheckNullptrInvariant() { + TORCH_INTERNAL_ASSERT_DEBUG_ONLY( + Data != nullptr || Length == 0, + "created ArrayRef with nullptr and non-zero length! std::optional relies on this being illegal"); + } + + public: + /// @name Constructors + /// @{ + + /// Construct an empty ArrayRef. + /* implicit */ constexpr ArrayRef() : Data(nullptr), Length(0) {} + + /// Construct an ArrayRef from a single element. + // TODO Make this explicit + constexpr ArrayRef(const T& OneElt) : Data(&OneElt), Length(1) {} + + /// Construct an ArrayRef from a pointer and length. + constexpr ArrayRef(const T* data, size_t length) + : Data(data), Length(length) { + debugCheckNullptrInvariant(); + } + + /// Construct an ArrayRef from a range. + constexpr ArrayRef(const T* begin, const T* end) + : Data(begin), Length(end - begin) { + debugCheckNullptrInvariant(); + } + + /// Construct an ArrayRef from a SmallVector. This is templated in order to + /// avoid instantiating SmallVectorTemplateCommon whenever we + /// copy-construct an ArrayRef. + template + /* implicit */ ArrayRef(const SmallVectorTemplateCommon& Vec) + : Data(Vec.data()), Length(Vec.size()) { + debugCheckNullptrInvariant(); + } + + template < + typename Container, + typename U = decltype(std::declval().data()), + typename = std::enable_if_t< + (std::is_same_v || std::is_same_v)>> + /* implicit */ ArrayRef(const Container& container) + : Data(container.data()), Length(container.size()) { + debugCheckNullptrInvariant(); + } + + /// Construct an ArrayRef from a std::vector. + // The enable_if stuff here makes sure that this isn't used for + // std::vector, because ArrayRef can't work on a std::vector + // bitfield. + template + /* implicit */ ArrayRef(const std::vector& Vec) + : Data(Vec.data()), Length(Vec.size()) { + static_assert( + !std::is_same_v, + "ArrayRef cannot be constructed from a std::vector bitfield."); + } + + /// Construct an ArrayRef from a std::array + template + /* implicit */ constexpr ArrayRef(const std::array& Arr) + : Data(Arr.data()), Length(N) {} + + /// Construct an ArrayRef from a C array. + template + // NOLINTNEXTLINE(*c-arrays*) + /* implicit */ constexpr ArrayRef(const T (&Arr)[N]) : Data(Arr), Length(N) {} + + /// Construct an ArrayRef from a std::initializer_list. + /* implicit */ constexpr ArrayRef(const std::initializer_list& Vec) + : Data( + std::begin(Vec) == std::end(Vec) ? static_cast(nullptr) + : std::begin(Vec)), + Length(Vec.size()) {} + + /// @} + /// @name Simple Operations + /// @{ + + constexpr iterator begin() const { + return Data; + } + constexpr iterator end() const { + return Data + Length; + } + + // These are actually the same as iterator, since ArrayRef only + // gives you const iterators. + constexpr const_iterator cbegin() const { + return Data; + } + constexpr const_iterator cend() const { + return Data + Length; + } + + constexpr reverse_iterator rbegin() const { + return reverse_iterator(end()); + } + constexpr reverse_iterator rend() const { + return reverse_iterator(begin()); + } + + /// Check if all elements in the array satisfy the given expression + constexpr bool allMatch(const std::function& pred) const { + return std::all_of(cbegin(), cend(), pred); + } + + /// empty - Check if the array is empty. + constexpr bool empty() const { + return Length == 0; + } + + constexpr const T* data() const { + return Data; + } + + /// size - Get the array size. + constexpr size_t size() const { + return Length; + } + + /// front - Get the first element. + constexpr const T& front() const { + TORCH_CHECK( + !empty(), "ArrayRef: attempted to access front() of empty list"); + return Data[0]; + } + + /// back - Get the last element. + constexpr const T& back() const { + TORCH_CHECK(!empty(), "ArrayRef: attempted to access back() of empty list"); + return Data[Length - 1]; + } + + /// equals - Check for element-wise equality. + constexpr bool equals(ArrayRef RHS) const { + return Length == RHS.Length && std::equal(begin(), end(), RHS.begin()); + } + + /// slice(n, m) - Take M elements of the array starting at element N + constexpr ArrayRef slice(size_t N, size_t M) const { + TORCH_CHECK( + N + M <= size(), + "ArrayRef: invalid slice, N = ", + N, + "; M = ", + M, + "; size = ", + size()); + return ArrayRef(data() + N, M); + } + + /// slice(n) - Chop off the first N elements of the array. + constexpr ArrayRef slice(size_t N) const { + TORCH_CHECK( + N <= size(), "ArrayRef: invalid slice, N = ", N, "; size = ", size()); + return slice(N, size() - N); + } + + /// @} + /// @name Operator Overloads + /// @{ + constexpr const T& operator[](size_t Index) const { + return Data[Index]; + } + + /// Vector compatibility + constexpr const T& at(size_t Index) const { + TORCH_CHECK( + Index < Length, + "ArrayRef: invalid index Index = ", + Index, + "; Length = ", + Length); + return Data[Index]; + } + + /// Disallow accidental assignment from a temporary. + /// + /// The declaration here is extra complicated so that "arrayRef = {}" + /// continues to select the move assignment operator. + template + std::enable_if_t, ArrayRef>& operator=( + // NOLINTNEXTLINE(cppcoreguidelines-missing-std-forward) + U&& Temporary) = delete; + + /// Disallow accidental assignment from a temporary. + /// + /// The declaration here is extra complicated so that "arrayRef = {}" + /// continues to select the move assignment operator. + template + std::enable_if_t, ArrayRef>& operator=( + std::initializer_list) = delete; + + /// @} + /// @name Expensive Operations + /// @{ + std::vector vec() const { + return std::vector(Data, Data + Length); + } + + /// @} +}; + +template +std::ostream& operator<<(std::ostream& out, ArrayRef list) { + int i = 0; + out << "["; + for (const auto& e : list) { + if (i++ > 0) + out << ", "; + out << e; + } + out << "]"; + return out; +} + +/// @name ArrayRef Convenience constructors +/// @{ + +/// Construct an ArrayRef from a single element. +template +ArrayRef makeArrayRef(const T& OneElt) { + return OneElt; +} + +/// Construct an ArrayRef from a pointer and length. +template +ArrayRef makeArrayRef(const T* data, size_t length) { + return ArrayRef(data, length); +} + +/// Construct an ArrayRef from a range. +template +ArrayRef makeArrayRef(const T* begin, const T* end) { + return ArrayRef(begin, end); +} + +/// Construct an ArrayRef from a SmallVector. +template +ArrayRef makeArrayRef(const SmallVectorImpl& Vec) { + return Vec; +} + +/// Construct an ArrayRef from a SmallVector. +template +ArrayRef makeArrayRef(const SmallVector& Vec) { + return Vec; +} + +/// Construct an ArrayRef from a std::vector. +template +ArrayRef makeArrayRef(const std::vector& Vec) { + return Vec; +} + +/// Construct an ArrayRef from a std::array. +template +ArrayRef makeArrayRef(const std::array& Arr) { + return Arr; +} + +/// Construct an ArrayRef from an ArrayRef (no-op) (const) +template +ArrayRef makeArrayRef(const ArrayRef& Vec) { + return Vec; +} + +/// Construct an ArrayRef from an ArrayRef (no-op) +template +ArrayRef& makeArrayRef(ArrayRef& Vec) { + return Vec; +} + +/// Construct an ArrayRef from a C array. +template +// NOLINTNEXTLINE(*c-arrays*) +ArrayRef makeArrayRef(const T (&Arr)[N]) { + return ArrayRef(Arr); +} + +// WARNING: Template instantiation will NOT be willing to do an implicit +// conversions to get you to an c10::ArrayRef, which is why we need so +// many overloads. + +template +bool operator==(c10::ArrayRef a1, c10::ArrayRef a2) { + return a1.equals(a2); +} + +template +bool operator!=(c10::ArrayRef a1, c10::ArrayRef a2) { + return !a1.equals(a2); +} + +template +bool operator==(const std::vector& a1, c10::ArrayRef a2) { + return c10::ArrayRef(a1).equals(a2); +} + +template +bool operator!=(const std::vector& a1, c10::ArrayRef a2) { + return !c10::ArrayRef(a1).equals(a2); +} + +template +bool operator==(c10::ArrayRef a1, const std::vector& a2) { + return a1.equals(c10::ArrayRef(a2)); +} + +template +bool operator!=(c10::ArrayRef a1, const std::vector& a2) { + return !a1.equals(c10::ArrayRef(a2)); +} + +using IntArrayRef = ArrayRef; + +using IntList [[deprecated( + "This alias is deprecated because it doesn't make ownership semantics obvious. Use IntArrayRef instead!")]] = + ArrayRef; + +} // namespace c10 diff --git a/phivenv/Lib/site-packages/torch/include/c10/util/BFloat16-inl.h b/phivenv/Lib/site-packages/torch/include/c10/util/BFloat16-inl.h new file mode 100644 index 0000000000000000000000000000000000000000..d8b8d9cca8d0eb4cd087b013c99ecc78f84c3080 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/util/BFloat16-inl.h @@ -0,0 +1,340 @@ +#pragma once + +#include +#include + +#include + +C10_CLANG_DIAGNOSTIC_PUSH() +#if C10_CLANG_HAS_WARNING("-Wimplicit-int-float-conversion") +C10_CLANG_DIAGNOSTIC_IGNORE("-Wimplicit-int-float-conversion") +#endif + +#if defined(CL_SYCL_LANGUAGE_VERSION) +#include // for SYCL 1.2.1 +#elif defined(SYCL_LANGUAGE_VERSION) +#include // for SYCL 2020 +#endif + +namespace c10 { + +/// Constructors +inline C10_HOST_DEVICE BFloat16::BFloat16(float value) + : +#if defined(__CUDACC__) && !defined(USE_ROCM) && defined(__CUDA_ARCH__) && \ + __CUDA_ARCH__ >= 800 + x(__bfloat16_as_ushort(__float2bfloat16(value))) +#elif defined(__SYCL_DEVICE_ONLY__) && \ + defined(SYCL_EXT_ONEAPI_BFLOAT16_MATH_FUNCTIONS) + x(c10::bit_cast(sycl::ext::oneapi::bfloat16(value))) +#else + // RNE by default + x(detail::round_to_nearest_even(value)) +#endif +{ +} + +/// Implicit conversions +inline C10_HOST_DEVICE BFloat16::operator float() const { +#if defined(__CUDACC__) && !defined(USE_ROCM) + return __bfloat162float(*reinterpret_cast(&x)); +#elif defined(__SYCL_DEVICE_ONLY__) && \ + defined(SYCL_EXT_ONEAPI_BFLOAT16_MATH_FUNCTIONS) + return float(*reinterpret_cast(&x)); +#else + return detail::f32_from_bits(x); +#endif +} + +#if defined(__CUDACC__) && !defined(USE_ROCM) +inline C10_HOST_DEVICE BFloat16::BFloat16(const __nv_bfloat16& value) { + x = *reinterpret_cast(&value); +} +inline C10_HOST_DEVICE BFloat16::operator __nv_bfloat16() const { + return *reinterpret_cast(&x); +} +#endif + +#if defined(SYCL_EXT_ONEAPI_BFLOAT16_MATH_FUNCTIONS) +inline C10_HOST_DEVICE BFloat16::BFloat16( + const sycl::ext::oneapi::bfloat16& value) { + x = *reinterpret_cast(&value); +} +inline C10_HOST_DEVICE BFloat16::operator sycl::ext::oneapi::bfloat16() const { + return *reinterpret_cast(&x); +} +#endif + +// CUDA intrinsics + +#if defined(__CUDACC__) || defined(__HIPCC__) +inline C10_DEVICE BFloat16 __ldg(const BFloat16* ptr) { +#if !defined(USE_ROCM) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 800 + return __ldg(reinterpret_cast(ptr)); +#else + return *ptr; +#endif +} +#endif + +/// Arithmetic + +inline C10_HOST_DEVICE BFloat16 +operator+(const BFloat16& a, const BFloat16& b) { + return static_cast(a) + static_cast(b); +} + +inline C10_HOST_DEVICE BFloat16 +operator-(const BFloat16& a, const BFloat16& b) { + return static_cast(a) - static_cast(b); +} + +inline C10_HOST_DEVICE BFloat16 +operator*(const BFloat16& a, const BFloat16& b) { + return static_cast(a) * static_cast(b); +} + +inline C10_HOST_DEVICE BFloat16 operator/(const BFloat16& a, const BFloat16& b) + __ubsan_ignore_float_divide_by_zero__ { + return static_cast(a) / static_cast(b); +} + +inline C10_HOST_DEVICE BFloat16 operator-(const BFloat16& a) { + return -static_cast(a); +} + +inline C10_HOST_DEVICE BFloat16& operator+=(BFloat16& a, const BFloat16& b) { + a = a + b; + return a; +} + +inline C10_HOST_DEVICE BFloat16& operator-=(BFloat16& a, const BFloat16& b) { + a = a - b; + return a; +} + +inline C10_HOST_DEVICE BFloat16& operator*=(BFloat16& a, const BFloat16& b) { + a = a * b; + return a; +} + +inline C10_HOST_DEVICE BFloat16& operator/=(BFloat16& a, const BFloat16& b) { + a = a / b; + return a; +} + +inline C10_HOST_DEVICE BFloat16& operator|(BFloat16& a, const BFloat16& b) { + a.x = a.x | b.x; + return a; +} + +inline C10_HOST_DEVICE BFloat16& operator^(BFloat16& a, const BFloat16& b) { + a.x = a.x ^ b.x; + return a; +} + +inline C10_HOST_DEVICE BFloat16& operator&(BFloat16& a, const BFloat16& b) { + a.x = a.x & b.x; + return a; +} + +/// Arithmetic with floats + +inline C10_HOST_DEVICE float operator+(BFloat16 a, float b) { + return static_cast(a) + b; +} +inline C10_HOST_DEVICE float operator-(BFloat16 a, float b) { + return static_cast(a) - b; +} +inline C10_HOST_DEVICE float operator*(BFloat16 a, float b) { + return static_cast(a) * b; +} +inline C10_HOST_DEVICE float operator/(BFloat16 a, float b) { + return static_cast(a) / b; +} + +inline C10_HOST_DEVICE float operator+(float a, BFloat16 b) { + return a + static_cast(b); +} +inline C10_HOST_DEVICE float operator-(float a, BFloat16 b) { + return a - static_cast(b); +} +inline C10_HOST_DEVICE float operator*(float a, BFloat16 b) { + return a * static_cast(b); +} +inline C10_HOST_DEVICE float operator/(float a, BFloat16 b) { + return a / static_cast(b); +} + +inline C10_HOST_DEVICE float& operator+=(float& a, const BFloat16& b) { + return a += static_cast(b); +} +inline C10_HOST_DEVICE float& operator-=(float& a, const BFloat16& b) { + return a -= static_cast(b); +} +inline C10_HOST_DEVICE float& operator*=(float& a, const BFloat16& b) { + return a *= static_cast(b); +} +inline C10_HOST_DEVICE float& operator/=(float& a, const BFloat16& b) { + return a /= static_cast(b); +} + +/// Arithmetic with doubles + +inline C10_HOST_DEVICE double operator+(BFloat16 a, double b) { + return static_cast(a) + b; +} +inline C10_HOST_DEVICE double operator-(BFloat16 a, double b) { + return static_cast(a) - b; +} +inline C10_HOST_DEVICE double operator*(BFloat16 a, double b) { + return static_cast(a) * b; +} +inline C10_HOST_DEVICE double operator/(BFloat16 a, double b) { + return static_cast(a) / b; +} + +inline C10_HOST_DEVICE double operator+(double a, BFloat16 b) { + return a + static_cast(b); +} +inline C10_HOST_DEVICE double operator-(double a, BFloat16 b) { + return a - static_cast(b); +} +inline C10_HOST_DEVICE double operator*(double a, BFloat16 b) { + return a * static_cast(b); +} +inline C10_HOST_DEVICE double operator/(double a, BFloat16 b) { + return a / static_cast(b); +} + +/// Arithmetic with ints + +inline C10_HOST_DEVICE BFloat16 operator+(BFloat16 a, int b) { + return a + static_cast(b); +} +inline C10_HOST_DEVICE BFloat16 operator-(BFloat16 a, int b) { + return a - static_cast(b); +} +inline C10_HOST_DEVICE BFloat16 operator*(BFloat16 a, int b) { + return a * static_cast(b); +} +inline C10_HOST_DEVICE BFloat16 operator/(BFloat16 a, int b) { + return a / static_cast(b); +} + +inline C10_HOST_DEVICE BFloat16 operator+(int a, BFloat16 b) { + return static_cast(a) + b; +} +inline C10_HOST_DEVICE BFloat16 operator-(int a, BFloat16 b) { + return static_cast(a) - b; +} +inline C10_HOST_DEVICE BFloat16 operator*(int a, BFloat16 b) { + return static_cast(a) * b; +} +inline C10_HOST_DEVICE BFloat16 operator/(int a, BFloat16 b) { + return static_cast(a) / b; +} + +//// Arithmetic with int64_t + +inline C10_HOST_DEVICE BFloat16 operator+(BFloat16 a, int64_t b) { + return a + static_cast(b); +} +inline C10_HOST_DEVICE BFloat16 operator-(BFloat16 a, int64_t b) { + return a - static_cast(b); +} +inline C10_HOST_DEVICE BFloat16 operator*(BFloat16 a, int64_t b) { + return a * static_cast(b); +} +inline C10_HOST_DEVICE BFloat16 operator/(BFloat16 a, int64_t b) { + return a / static_cast(b); +} + +inline C10_HOST_DEVICE BFloat16 operator+(int64_t a, BFloat16 b) { + return static_cast(a) + b; +} +inline C10_HOST_DEVICE BFloat16 operator-(int64_t a, BFloat16 b) { + return static_cast(a) - b; +} +inline C10_HOST_DEVICE BFloat16 operator*(int64_t a, BFloat16 b) { + return static_cast(a) * b; +} +inline C10_HOST_DEVICE BFloat16 operator/(int64_t a, BFloat16 b) { + return static_cast(a) / b; +} + +// Overloading < and > operators, because std::max and std::min use them. + +inline C10_HOST_DEVICE bool operator>(BFloat16& lhs, BFloat16& rhs) { + return float(lhs) > float(rhs); +} + +inline C10_HOST_DEVICE bool operator<(BFloat16& lhs, BFloat16& rhs) { + return float(lhs) < float(rhs); +} + +} // namespace c10 + +namespace std { + +template <> +class numeric_limits { + public: + static constexpr bool is_signed = true; + static constexpr bool is_specialized = true; + static constexpr bool is_integer = false; + static constexpr bool is_exact = false; + static constexpr bool has_infinity = true; + static constexpr bool has_quiet_NaN = true; + static constexpr bool has_signaling_NaN = true; + static constexpr auto has_denorm = numeric_limits::has_denorm; + static constexpr auto has_denorm_loss = + numeric_limits::has_denorm_loss; + static constexpr auto round_style = numeric_limits::round_style; + static constexpr bool is_iec559 = false; + static constexpr bool is_bounded = true; + static constexpr bool is_modulo = false; + static constexpr int digits = 8; + static constexpr int digits10 = 2; + static constexpr int max_digits10 = 4; + static constexpr int radix = 2; + static constexpr int min_exponent = -125; + static constexpr int min_exponent10 = -37; + static constexpr int max_exponent = 128; + static constexpr int max_exponent10 = 38; + static constexpr auto traps = numeric_limits::traps; + static constexpr auto tinyness_before = + numeric_limits::tinyness_before; + + static constexpr c10::BFloat16 min() { + return c10::BFloat16(0x0080, c10::BFloat16::from_bits()); + } + static constexpr c10::BFloat16 lowest() { + return c10::BFloat16(0xFF7F, c10::BFloat16::from_bits()); + } + static constexpr c10::BFloat16 max() { + return c10::BFloat16(0x7F7F, c10::BFloat16::from_bits()); + } + static constexpr c10::BFloat16 epsilon() { + return c10::BFloat16(0x3C00, c10::BFloat16::from_bits()); + } + static constexpr c10::BFloat16 round_error() { + return c10::BFloat16(0x3F00, c10::BFloat16::from_bits()); + } + static constexpr c10::BFloat16 infinity() { + return c10::BFloat16(0x7F80, c10::BFloat16::from_bits()); + } + static constexpr c10::BFloat16 quiet_NaN() { + return c10::BFloat16(0x7FC0, c10::BFloat16::from_bits()); + } + static constexpr c10::BFloat16 signaling_NaN() { + return c10::BFloat16(0x7F80, c10::BFloat16::from_bits()); + } + static constexpr c10::BFloat16 denorm_min() { + return c10::BFloat16(0x0001, c10::BFloat16::from_bits()); + } +}; + +} // namespace std + +C10_CLANG_DIAGNOSTIC_POP() diff --git a/phivenv/Lib/site-packages/torch/include/c10/util/BFloat16-math.h b/phivenv/Lib/site-packages/torch/include/c10/util/BFloat16-math.h new file mode 100644 index 0000000000000000000000000000000000000000..f06b04b9c9d6974f0dbeb2fc32649aa427d33952 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/util/BFloat16-math.h @@ -0,0 +1,299 @@ +#pragma once + +#include +#include + +C10_CLANG_DIAGNOSTIC_PUSH() +#if C10_CLANG_HAS_WARNING("-Wimplicit-float-conversion") +C10_CLANG_DIAGNOSTIC_IGNORE("-Wimplicit-float-conversion") +#endif + +namespace c10 { +template +struct is_reduced_floating_point + : std::integral_constant< + bool, + std::is_same_v || std::is_same_v> {}; + +template +constexpr bool is_reduced_floating_point_v = + is_reduced_floating_point::value; +} // namespace c10 + +namespace std { + +#if !defined(FBCODE_CAFFE2) && !defined(C10_NODEPRECATED) +using c10::is_reduced_floating_point; +using c10::is_reduced_floating_point_v; +#endif // !defined(FBCODE_CAFFE2) && !defined(C10_NODEPRECATED) + +template < + typename T, + typename std::enable_if_t, int> = 0> +inline T acos(T a) { + return std::acos(float(a)); +} +template < + typename T, + typename std::enable_if_t, int> = 0> +inline T asin(T a) { + return std::asin(float(a)); +} +template < + typename T, + typename std::enable_if_t, int> = 0> +inline T atan(T a) { + return std::atan(float(a)); +} +template < + typename T, + typename std::enable_if_t, int> = 0> +inline T atanh(T a) { + return std::atanh(float(a)); +} +template < + typename T, + typename std::enable_if_t, int> = 0> +inline T erf(T a) { + return std::erf(float(a)); +} +template < + typename T, + typename std::enable_if_t, int> = 0> +inline T erfc(T a) { + return std::erfc(float(a)); +} +template < + typename T, + typename std::enable_if_t, int> = 0> +inline T exp(T a) { + return std::exp(float(a)); +} +template < + typename T, + typename std::enable_if_t, int> = 0> +inline T expm1(T a) { + return std::expm1(float(a)); +} +template < + typename T, + typename std::enable_if_t, int> = 0> +inline bool isfinite(T a) { + return std::isfinite(float(a)); +} +template < + typename T, + typename std::enable_if_t, int> = 0> +inline T log(T a) { + return std::log(float(a)); +} +template < + typename T, + typename std::enable_if_t, int> = 0> +inline T log10(T a) { + return std::log10(float(a)); +} +template < + typename T, + typename std::enable_if_t, int> = 0> +inline T log1p(T a) { + return std::log1p(float(a)); +} +template < + typename T, + typename std::enable_if_t, int> = 0> +inline T log2(T a) { + return std::log2(float(a)); +} +template < + typename T, + typename std::enable_if_t, int> = 0> +inline T ceil(T a) { + return std::ceil(float(a)); +} +template < + typename T, + typename std::enable_if_t, int> = 0> +inline T cos(T a) { + return std::cos(float(a)); +} +template < + typename T, + typename std::enable_if_t, int> = 0> +inline T floor(T a) { + return std::floor(float(a)); +} +template < + typename T, + typename std::enable_if_t, int> = 0> +inline T nearbyint(T a) { + return std::nearbyint(float(a)); +} +template < + typename T, + typename std::enable_if_t, int> = 0> +inline T sin(T a) { + return std::sin(float(a)); +} +template < + typename T, + typename std::enable_if_t, int> = 0> +inline T tan(T a) { + return std::tan(float(a)); +} +template < + typename T, + typename std::enable_if_t, int> = 0> +inline T sinh(T a) { + return std::sinh(float(a)); +} +template < + typename T, + typename std::enable_if_t, int> = 0> +inline T cosh(T a) { + return std::cosh(float(a)); +} +template < + typename T, + typename std::enable_if_t, int> = 0> +inline T tanh(T a) { + return std::tanh(float(a)); +} +template < + typename T, + typename std::enable_if_t, int> = 0> +inline T trunc(T a) { + return std::trunc(float(a)); +} +template < + typename T, + typename std::enable_if_t, int> = 0> +inline T lgamma(T a) { + return std::lgamma(float(a)); +} +template < + typename T, + typename std::enable_if_t, int> = 0> +inline T sqrt(T a) { + return std::sqrt(float(a)); +} +template < + typename T, + typename std::enable_if_t, int> = 0> +inline T rsqrt(T a) { + return 1.0 / std::sqrt(float(a)); +} +template < + typename T, + typename std::enable_if_t, int> = 0> +inline T abs(T a) { + return std::abs(float(a)); +} +#if defined(_MSC_VER) && defined(__CUDACC__) +template < + typename T, + typename std::enable_if_t, int> = 0> +inline T pow(T a, double b) { + return std::pow(float(a), float(b)); +} +#else +template < + typename T, + typename std::enable_if_t, int> = 0> +inline T pow(T a, double b) { + return std::pow(float(a), b); +} +#endif +template < + typename T, + typename std::enable_if_t, int> = 0> +inline T pow(T a, T b) { + return std::pow(float(a), float(b)); +} +template < + typename T, + typename std::enable_if_t, int> = 0> +inline T fmod(T a, T b) { + return std::fmod(float(a), float(b)); +} + +/* + The following function is inspired from the implementation in `musl` + Link to License: https://git.musl-libc.org/cgit/musl/tree/COPYRIGHT + ---------------------------------------------------------------------- + Copyright © 2005-2020 Rich Felker, et al. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + ---------------------------------------------------------------------- + */ +template < + typename T, + typename std::enable_if_t, int> = 0> +C10_HOST_DEVICE inline T nextafter(T from, T to) { + // Reference: + // https://git.musl-libc.org/cgit/musl/tree/src/math/nextafter.c + using int_repr_t = uint16_t; + constexpr uint8_t bits = 16; + union { + T f; + int_repr_t i; + } ufrom = {from}, uto = {to}; + + // get a mask to get the sign bit i.e. MSB + int_repr_t sign_mask = int_repr_t{1} << (bits - 1); + + // short-circuit: if either is NaN, return NaN + if (from != from || to != to) { + return from + to; + } + + // short-circuit: if they are exactly the same. + if (ufrom.i == uto.i) { + return from; + } + + // mask the sign-bit to zero i.e. positive + // equivalent to abs(x) + int_repr_t abs_from = ufrom.i & ~sign_mask; + int_repr_t abs_to = uto.i & ~sign_mask; + if (abs_from == 0) { + // if both are zero but with different sign, + // preserve the sign of `to`. + if (abs_to == 0) { + return to; + } + // smallest subnormal with sign of `to`. + ufrom.i = (uto.i & sign_mask) | int_repr_t{1}; + return ufrom.f; + } + + // if abs(from) > abs(to) or sign(from) != sign(to) + if (abs_from > abs_to || ((ufrom.i ^ uto.i) & sign_mask)) { + ufrom.i--; + } else { + ufrom.i++; + } + + return ufrom.f; +} + +} // namespace std + +C10_CLANG_DIAGNOSTIC_POP() diff --git a/phivenv/Lib/site-packages/torch/include/c10/util/BFloat16.h b/phivenv/Lib/site-packages/torch/include/c10/util/BFloat16.h new file mode 100644 index 0000000000000000000000000000000000000000..48603e83fd84ffd0b6c4ac943cb2be3a9d3457e2 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/util/BFloat16.h @@ -0,0 +1,123 @@ +#pragma once + +// Defines the bloat16 type (brain floating-point). This representation uses +// 1 bit for the sign, 8 bits for the exponent and 7 bits for the mantissa. + +#include +#include +#include +#include +#include +#include + +#if defined(__CUDACC__) && !defined(USE_ROCM) +#include +#endif + +#if defined(CL_SYCL_LANGUAGE_VERSION) +#include // for SYCL 1.2.1 +#elif defined(SYCL_LANGUAGE_VERSION) +#include // for SYCL 2020 +#endif + +namespace c10 { + +namespace detail { +inline C10_HOST_DEVICE float f32_from_bits(uint16_t src) { + float res = 0; + uint32_t tmp = src; + tmp <<= 16; + +#if defined(USE_ROCM) && defined(__HIPCC__) + float* tempRes; + + // We should be using memcpy in order to respect the strict aliasing rule + // but it fails in the HIP environment. + tempRes = reinterpret_cast(&tmp); + res = *tempRes; +#else + std::memcpy(&res, &tmp, sizeof(tmp)); +#endif + + return res; +} + +inline C10_HOST_DEVICE uint16_t bits_from_f32(float src) { + uint32_t res = 0; + +#if defined(USE_ROCM) && defined(__HIPCC__) + // We should be using memcpy in order to respect the strict aliasing rule + // but it fails in the HIP environment. + uint32_t* tempRes = reinterpret_cast(&src); + res = *tempRes; +#else + std::memcpy(&res, &src, sizeof(res)); +#endif + + return res >> 16; +} + +inline C10_HOST_DEVICE uint16_t round_to_nearest_even(float src) { +#if defined(USE_ROCM) && defined(__HIPCC__) + if (src != src) { +#elif defined(_MSC_VER) + if (isnan(src)) { +#else + if (std::isnan(src)) { +#endif + return UINT16_C(0x7FC0); + } else { + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init) + union { + uint32_t U32; // NOLINT(facebook-hte-BadMemberName) + float F32; // NOLINT(facebook-hte-BadMemberName) + }; + + F32 = src; + uint32_t rounding_bias = ((U32 >> 16) & 1) + UINT32_C(0x7FFF); + return static_cast((U32 + rounding_bias) >> 16); + } +} +} // namespace detail + +struct alignas(2) BFloat16 { + uint16_t x; + + // HIP wants __host__ __device__ tag, CUDA does not +#if defined(USE_ROCM) && defined(__HIPCC__) + C10_HOST_DEVICE BFloat16() = default; +#else + BFloat16() = default; +#endif + + struct from_bits_t {}; + static constexpr C10_HOST_DEVICE from_bits_t from_bits() { + return from_bits_t(); + } + + constexpr C10_HOST_DEVICE BFloat16(unsigned short bits, from_bits_t) + : x(bits) {} + /* implicit */ inline C10_HOST_DEVICE BFloat16(float value); + inline C10_HOST_DEVICE operator float() const; + +#if defined(__CUDACC__) && !defined(USE_ROCM) + inline C10_HOST_DEVICE BFloat16(const __nv_bfloat16& value); + explicit inline C10_HOST_DEVICE operator __nv_bfloat16() const; +#endif + +#if defined(SYCL_EXT_ONEAPI_BFLOAT16_MATH_FUNCTIONS) + inline C10_HOST_DEVICE BFloat16(const sycl::ext::oneapi::bfloat16& value); + explicit inline C10_HOST_DEVICE operator sycl::ext::oneapi::bfloat16() const; +#endif +}; + +C10_API inline std::ostream& operator<<( + std::ostream& out, + const BFloat16& value) { + out << (float)value; + return out; +} + +} // namespace c10 + +#include // IWYU pragma: keep diff --git a/phivenv/Lib/site-packages/torch/include/c10/util/Backtrace.h b/phivenv/Lib/site-packages/torch/include/c10/util/Backtrace.h new file mode 100644 index 0000000000000000000000000000000000000000..d314570b74f94b9aa29d9c9ddfcc70f525f76653 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/util/Backtrace.h @@ -0,0 +1,31 @@ +#ifndef C10_UTIL_BACKTRACE_H_ +#define C10_UTIL_BACKTRACE_H_ + +#include +#include +#include +#include + +#include +#include + +namespace c10 { + +// Symbolizing the backtrace can be expensive; pass it around as a lazy string +// so it is symbolized only if actually needed. +using Backtrace = std::shared_ptr>; + +// DEPRECATED: Prefer get_lazy_backtrace(). +C10_API std::string get_backtrace( + size_t frames_to_skip = 0, + size_t maximum_number_of_frames = 64, + bool skip_python_frames = true); + +C10_API Backtrace get_lazy_backtrace( + size_t frames_to_skip = 0, + size_t maximum_number_of_frames = 64, + bool skip_python_frames = true); + +} // namespace c10 + +#endif // C10_UTIL_BACKTRACE_H_ diff --git a/phivenv/Lib/site-packages/torch/include/c10/util/Bitset.h b/phivenv/Lib/site-packages/torch/include/c10/util/Bitset.h new file mode 100644 index 0000000000000000000000000000000000000000..60fc589ad0480efe397a033add7c757b409f94da --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/util/Bitset.h @@ -0,0 +1,118 @@ +#pragma once + +#include +#if defined(_MSC_VER) +#include +#endif + +namespace c10::utils { + +/** + * This is a simple bitset class with sizeof(long long int) bits. + * You can set bits, unset bits, query bits by index, + * and query for the first set bit. + * Before using this class, please also take a look at std::bitset, + * which has more functionality and is more generic. It is probably + * a better fit for your use case. The sole reason for c10::utils::bitset + * to exist is that std::bitset misses a find_first_set() method. + */ +struct bitset final { + private: +#if defined(_MSC_VER) + // MSVCs _BitScanForward64 expects int64_t + using bitset_type = int64_t; +#else + // POSIX ffsll expects long long int + using bitset_type = long long int; +#endif + public: + static constexpr size_t NUM_BITS() { + return 8 * sizeof(bitset_type); + } + + constexpr bitset() noexcept = default; + constexpr bitset(const bitset&) noexcept = default; + constexpr bitset(bitset&&) noexcept = default; + // there is an issure for gcc 5.3.0 when define default function as constexpr + // see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=68754. + bitset& operator=(const bitset&) noexcept = default; + bitset& operator=(bitset&&) noexcept = default; + ~bitset() = default; + + constexpr void set(size_t index) noexcept { + bitset_ |= (static_cast(1) << index); + } + + constexpr void unset(size_t index) noexcept { + bitset_ &= ~(static_cast(1) << index); + } + + constexpr bool get(size_t index) const noexcept { + return bitset_ & (static_cast(1) << index); + } + + constexpr bool is_entirely_unset() const noexcept { + return 0 == bitset_; + } + + // Call the given functor with the index of each bit that is set + template + // NOLINTNEXTLINE(cppcoreguidelines-missing-std-forward) + void for_each_set_bit(Func&& func) const { + bitset cur = *this; + size_t index = cur.find_first_set(); + while (0 != index) { + // -1 because find_first_set() is not one-indexed. + index -= 1; + func(index); + cur.unset(index); + index = cur.find_first_set(); + } + } + + private: + // Return the index of the first set bit. The returned index is one-indexed + // (i.e. if the very first bit is set, this function returns '1'), and a + // return of '0' means that there was no bit set. + size_t find_first_set() const { +#if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_ARM64)) + unsigned long result; + bool has_bits_set = (0 != _BitScanForward64(&result, bitset_)); + if (!has_bits_set) { + return 0; + } + return result + 1; +#elif defined(_MSC_VER) && defined(_M_IX86) + unsigned long result; + if (static_cast(bitset_) != 0) { + bool has_bits_set = + (0 != _BitScanForward(&result, static_cast(bitset_))); + if (!has_bits_set) { + return 0; + } + return result + 1; + } else { + bool has_bits_set = + (0 != _BitScanForward(&result, static_cast(bitset_ >> 32))); + if (!has_bits_set) { + return 32; + } + return result + 33; + } +#else + return __builtin_ffsll(bitset_); +#endif + } + + friend bool operator==(bitset lhs, bitset rhs) noexcept { + return lhs.bitset_ == rhs.bitset_; + } + + bitset_type bitset_{0}; +}; + +inline bool operator!=(bitset lhs, bitset rhs) noexcept { + return !(lhs == rhs); +} + +} // namespace c10::utils diff --git a/phivenv/Lib/site-packages/torch/include/c10/util/C++17.h b/phivenv/Lib/site-packages/torch/include/c10/util/C++17.h new file mode 100644 index 0000000000000000000000000000000000000000..b4c82fc79d8157a9386382f94b6f36a1ec950759 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/util/C++17.h @@ -0,0 +1,86 @@ +#pragma once +#ifndef C10_UTIL_CPP17_H_ +#define C10_UTIL_CPP17_H_ + +#include +#include +#include +#include +#include + +#if !defined(__clang__) && !defined(_MSC_VER) && defined(__GNUC__) && \ + __GNUC__ < 9 +#error \ + "You're trying to build PyTorch with a too old version of GCC. We need GCC 9 or later." +#endif + +#if defined(__clang__) && __clang_major__ < 9 +#error \ + "You're trying to build PyTorch with a too old version of Clang. We need Clang 9 or later." +#endif + +#if (defined(_MSC_VER) && (!defined(_MSVC_LANG) || _MSVC_LANG < 201703L)) || \ + (!defined(_MSC_VER) && __cplusplus < 201703L) +#error You need C++17 to compile PyTorch +#endif + +#if defined(_WIN32) && (defined(min) || defined(max)) +#error Macro clash with min and max -- define NOMINMAX when compiling your program on Windows +#endif + +/* + * This header adds some polyfills with C++17 functionality + */ + +namespace c10 { + +// std::is_pod is deprecated in C++20, std::is_standard_layout and +// std::is_trivial are introduced in C++11, std::conjunction has been introduced +// in C++17. +template +using is_pod = std::conjunction, std::is_trivial>; + +template +constexpr bool is_pod_v = is_pod::value; + +namespace guts { + +#if defined(__cpp_lib_apply) && !defined(__CUDA_ARCH__) && !defined(__HIP__) + +template +C10_HOST_DEVICE inline constexpr decltype(auto) apply(F&& f, Tuple&& t) { + return std::apply(std::forward(f), std::forward(t)); +} + +#else + +// Implementation from http://en.cppreference.com/w/cpp/utility/apply (but +// modified) +// TODO This is an incomplete implementation of std::apply, not working for +// member functions. +namespace detail { +template +C10_HOST_DEVICE constexpr decltype(auto) apply_impl( + F&& f, + Tuple&& t, + std::index_sequence) { + return std::forward(f)(std::get(std::forward(t))...); +} +} // namespace detail + +template +C10_HOST_DEVICE constexpr decltype(auto) apply(F&& f, Tuple&& t) { + return detail::apply_impl( + std::forward(f), + std::forward(t), + std::make_index_sequence< + std::tuple_size>::value>{}); +} + +#endif + +} // namespace guts + +} // namespace c10 + +#endif // C10_UTIL_CPP17_H_ diff --git a/phivenv/Lib/site-packages/torch/include/c10/util/CallOnce.h b/phivenv/Lib/site-packages/torch/include/c10/util/CallOnce.h new file mode 100644 index 0000000000000000000000000000000000000000..e466fe22bd28d32f0ebf153a8cc5c5090a5e3948 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/util/CallOnce.h @@ -0,0 +1,70 @@ +#pragma once + +#include +#include + +#include +#include +#include +#include + +namespace c10 { + +// custom c10 call_once implementation to avoid the deadlock in std::call_once. +// The implementation here is a simplified version from folly and likely much +// much higher memory footprint. +template +inline void call_once(Flag& flag, F&& f, Args&&... args) { + if (C10_LIKELY(flag.test_once())) { + return; + } + flag.call_once_slow(std::forward(f), std::forward(args)...); +} + +class once_flag { + public: +#ifndef _WIN32 + // running into build error on MSVC. Can't seem to get a repro locally so I'm + // just avoiding constexpr + // + // C:/actions-runner/_work/pytorch/pytorch\c10/util/CallOnce.h(26): error: + // defaulted default constructor cannot be constexpr because the + // corresponding implicitly declared default constructor would not be + // constexpr 1 error detected in the compilation of + // "C:/actions-runner/_work/pytorch/pytorch/aten/src/ATen/cuda/cub.cu". + constexpr +#endif + once_flag() noexcept = default; + once_flag(const once_flag&) = delete; + once_flag& operator=(const once_flag&) = delete; + once_flag(once_flag&&) = delete; + once_flag& operator=(once_flag&&) = delete; + ~once_flag() = default; + bool test_once() { + return init_.load(std::memory_order_acquire); + } + + private: + template + friend void call_once(Flag& flag, F&& f, Args&&... args); + + template + void call_once_slow(F&& f, Args&&... args) { + std::lock_guard guard(mutex_); + if (init_.load(std::memory_order_relaxed)) { + return; + } + std::invoke(std::forward(f), std::forward(args)...); + init_.store(true, std::memory_order_release); + } + + void reset_once() { + init_.store(false, std::memory_order_release); + } + + private: + std::mutex mutex_; + std::atomic init_{false}; +}; + +} // namespace c10 diff --git a/phivenv/Lib/site-packages/torch/include/c10/util/ConstexprCrc.h b/phivenv/Lib/site-packages/torch/include/c10/util/ConstexprCrc.h new file mode 100644 index 0000000000000000000000000000000000000000..86d58620bdbb865d8d32fbd4c10ef6f3d877c833 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/util/ConstexprCrc.h @@ -0,0 +1,132 @@ +#pragma once + +#include +#include +#include +#include +#include + +namespace c10::util { + +namespace detail { +// NOLINTNEXTLINE(*c-arrays*) +constexpr uint64_t crc64_table[] = { + 0x0000000000000000, 0x7ad870c830358979, 0xf5b0e190606b12f2, + 0x8f689158505e9b8b, 0xc038e5739841b68f, 0xbae095bba8743ff6, + 0x358804e3f82aa47d, 0x4f50742bc81f2d04, 0xab28ecb46814fe75, + 0xd1f09c7c5821770c, 0x5e980d24087fec87, 0x24407dec384a65fe, + 0x6b1009c7f05548fa, 0x11c8790fc060c183, 0x9ea0e857903e5a08, + 0xe478989fa00bd371, 0x7d08ff3b88be6f81, 0x07d08ff3b88be6f8, + 0x88b81eabe8d57d73, 0xf2606e63d8e0f40a, 0xbd301a4810ffd90e, + 0xc7e86a8020ca5077, 0x4880fbd87094cbfc, 0x32588b1040a14285, + 0xd620138fe0aa91f4, 0xacf86347d09f188d, 0x2390f21f80c18306, + 0x594882d7b0f40a7f, 0x1618f6fc78eb277b, 0x6cc0863448deae02, + 0xe3a8176c18803589, 0x997067a428b5bcf0, 0xfa11fe77117cdf02, + 0x80c98ebf2149567b, 0x0fa11fe77117cdf0, 0x75796f2f41224489, + 0x3a291b04893d698d, 0x40f16bccb908e0f4, 0xcf99fa94e9567b7f, + 0xb5418a5cd963f206, 0x513912c379682177, 0x2be1620b495da80e, + 0xa489f35319033385, 0xde51839b2936bafc, 0x9101f7b0e12997f8, + 0xebd98778d11c1e81, 0x64b116208142850a, 0x1e6966e8b1770c73, + 0x8719014c99c2b083, 0xfdc17184a9f739fa, 0x72a9e0dcf9a9a271, + 0x08719014c99c2b08, 0x4721e43f0183060c, 0x3df994f731b68f75, + 0xb29105af61e814fe, 0xc849756751dd9d87, 0x2c31edf8f1d64ef6, + 0x56e99d30c1e3c78f, 0xd9810c6891bd5c04, 0xa3597ca0a188d57d, + 0xec09088b6997f879, 0x96d1784359a27100, 0x19b9e91b09fcea8b, + 0x636199d339c963f2, 0xdf7adabd7a6e2d6f, 0xa5a2aa754a5ba416, + 0x2aca3b2d1a053f9d, 0x50124be52a30b6e4, 0x1f423fcee22f9be0, + 0x659a4f06d21a1299, 0xeaf2de5e82448912, 0x902aae96b271006b, + 0x74523609127ad31a, 0x0e8a46c1224f5a63, 0x81e2d7997211c1e8, + 0xfb3aa75142244891, 0xb46ad37a8a3b6595, 0xceb2a3b2ba0eecec, + 0x41da32eaea507767, 0x3b024222da65fe1e, 0xa2722586f2d042ee, + 0xd8aa554ec2e5cb97, 0x57c2c41692bb501c, 0x2d1ab4dea28ed965, + 0x624ac0f56a91f461, 0x1892b03d5aa47d18, 0x97fa21650afae693, + 0xed2251ad3acf6fea, 0x095ac9329ac4bc9b, 0x7382b9faaaf135e2, + 0xfcea28a2faafae69, 0x8632586aca9a2710, 0xc9622c4102850a14, + 0xb3ba5c8932b0836d, 0x3cd2cdd162ee18e6, 0x460abd1952db919f, + 0x256b24ca6b12f26d, 0x5fb354025b277b14, 0xd0dbc55a0b79e09f, + 0xaa03b5923b4c69e6, 0xe553c1b9f35344e2, 0x9f8bb171c366cd9b, + 0x10e3202993385610, 0x6a3b50e1a30ddf69, 0x8e43c87e03060c18, + 0xf49bb8b633338561, 0x7bf329ee636d1eea, 0x012b592653589793, + 0x4e7b2d0d9b47ba97, 0x34a35dc5ab7233ee, 0xbbcbcc9dfb2ca865, + 0xc113bc55cb19211c, 0x5863dbf1e3ac9dec, 0x22bbab39d3991495, + 0xadd33a6183c78f1e, 0xd70b4aa9b3f20667, 0x985b3e827bed2b63, + 0xe2834e4a4bd8a21a, 0x6debdf121b863991, 0x1733afda2bb3b0e8, + 0xf34b37458bb86399, 0x8993478dbb8deae0, 0x06fbd6d5ebd3716b, + 0x7c23a61ddbe6f812, 0x3373d23613f9d516, 0x49aba2fe23cc5c6f, + 0xc6c333a67392c7e4, 0xbc1b436e43a74e9d, 0x95ac9329ac4bc9b5, + 0xef74e3e19c7e40cc, 0x601c72b9cc20db47, 0x1ac40271fc15523e, + 0x5594765a340a7f3a, 0x2f4c0692043ff643, 0xa02497ca54616dc8, + 0xdafce7026454e4b1, 0x3e847f9dc45f37c0, 0x445c0f55f46abeb9, + 0xcb349e0da4342532, 0xb1eceec59401ac4b, 0xfebc9aee5c1e814f, + 0x8464ea266c2b0836, 0x0b0c7b7e3c7593bd, 0x71d40bb60c401ac4, + 0xe8a46c1224f5a634, 0x927c1cda14c02f4d, 0x1d148d82449eb4c6, + 0x67ccfd4a74ab3dbf, 0x289c8961bcb410bb, 0x5244f9a98c8199c2, + 0xdd2c68f1dcdf0249, 0xa7f41839ecea8b30, 0x438c80a64ce15841, + 0x3954f06e7cd4d138, 0xb63c61362c8a4ab3, 0xcce411fe1cbfc3ca, + 0x83b465d5d4a0eece, 0xf96c151de49567b7, 0x76048445b4cbfc3c, + 0x0cdcf48d84fe7545, 0x6fbd6d5ebd3716b7, 0x15651d968d029fce, + 0x9a0d8ccedd5c0445, 0xe0d5fc06ed698d3c, 0xaf85882d2576a038, + 0xd55df8e515432941, 0x5a3569bd451db2ca, 0x20ed197575283bb3, + 0xc49581ead523e8c2, 0xbe4df122e51661bb, 0x3125607ab548fa30, + 0x4bfd10b2857d7349, 0x04ad64994d625e4d, 0x7e7514517d57d734, + 0xf11d85092d094cbf, 0x8bc5f5c11d3cc5c6, 0x12b5926535897936, + 0x686de2ad05bcf04f, 0xe70573f555e26bc4, 0x9ddd033d65d7e2bd, + 0xd28d7716adc8cfb9, 0xa85507de9dfd46c0, 0x273d9686cda3dd4b, + 0x5de5e64efd965432, 0xb99d7ed15d9d8743, 0xc3450e196da80e3a, + 0x4c2d9f413df695b1, 0x36f5ef890dc31cc8, 0x79a59ba2c5dc31cc, + 0x037deb6af5e9b8b5, 0x8c157a32a5b7233e, 0xf6cd0afa9582aa47, + 0x4ad64994d625e4da, 0x300e395ce6106da3, 0xbf66a804b64ef628, + 0xc5bed8cc867b7f51, 0x8aeeace74e645255, 0xf036dc2f7e51db2c, + 0x7f5e4d772e0f40a7, 0x05863dbf1e3ac9de, 0xe1fea520be311aaf, + 0x9b26d5e88e0493d6, 0x144e44b0de5a085d, 0x6e963478ee6f8124, + 0x21c640532670ac20, 0x5b1e309b16452559, 0xd476a1c3461bbed2, + 0xaeaed10b762e37ab, 0x37deb6af5e9b8b5b, 0x4d06c6676eae0222, + 0xc26e573f3ef099a9, 0xb8b627f70ec510d0, 0xf7e653dcc6da3dd4, + 0x8d3e2314f6efb4ad, 0x0256b24ca6b12f26, 0x788ec2849684a65f, + 0x9cf65a1b368f752e, 0xe62e2ad306bafc57, 0x6946bb8b56e467dc, + 0x139ecb4366d1eea5, 0x5ccebf68aecec3a1, 0x2616cfa09efb4ad8, + 0xa97e5ef8cea5d153, 0xd3a62e30fe90582a, 0xb0c7b7e3c7593bd8, + 0xca1fc72bf76cb2a1, 0x45775673a732292a, 0x3faf26bb9707a053, + 0x70ff52905f188d57, 0x0a2722586f2d042e, 0x854fb3003f739fa5, + 0xff97c3c80f4616dc, 0x1bef5b57af4dc5ad, 0x61372b9f9f784cd4, + 0xee5fbac7cf26d75f, 0x9487ca0fff135e26, 0xdbd7be24370c7322, + 0xa10fceec0739fa5b, 0x2e675fb4576761d0, 0x54bf2f7c6752e8a9, + 0xcdcf48d84fe75459, 0xb71738107fd2dd20, 0x387fa9482f8c46ab, + 0x42a7d9801fb9cfd2, 0x0df7adabd7a6e2d6, 0x772fdd63e7936baf, + 0xf8474c3bb7cdf024, 0x829f3cf387f8795d, 0x66e7a46c27f3aa2c, + 0x1c3fd4a417c62355, 0x935745fc4798b8de, 0xe98f353477ad31a7, + 0xa6df411fbfb21ca3, 0xdc0731d78f8795da, 0x536fa08fdfd90e51, + 0x29b7d047efec8728, +}; + +inline constexpr uint64_t crc64impl( + uint64_t accumulator, + const char* data, + size_t size) { + for (size_t i = 0; i < size; ++i) { + accumulator = + crc64_table[(accumulator ^ data[i]) & 0xFF] ^ (accumulator >> 8); + } + return accumulator; +} +} // namespace detail + +struct crc64_t final : IdWrapper { + constexpr crc64_t(uint64_t checksum) : IdWrapper(checksum) {} + constexpr uint64_t checksum() const { + return this->underlyingId(); + } +}; + +// CRC64 with Jones coefficients and an init value of 0. +inline constexpr crc64_t crc64(const char* str, size_t size) { + return crc64_t{detail::crc64impl(0, str, size)}; +} + +inline constexpr crc64_t crc64(std::string_view str) { + return crc64(str.data(), str.size()); +} +} // namespace c10::util + +// Allow usage of crc64_t in std::unordered_set +C10_DEFINE_HASH_FOR_IDWRAPPER(c10::util::crc64_t) diff --git a/phivenv/Lib/site-packages/torch/include/c10/util/DeadlockDetection.h b/phivenv/Lib/site-packages/torch/include/c10/util/DeadlockDetection.h new file mode 100644 index 0000000000000000000000000000000000000000..b46d294588177a4c56a12f3fac82321e1682cb67 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/util/DeadlockDetection.h @@ -0,0 +1,52 @@ +#pragma once + +#include +#include + +/// This file provides some simple utilities for detecting common deadlocks in +/// PyTorch. For now, we focus exclusively on detecting Python GIL deadlocks, +/// as the GIL is a wide ranging lock that is taken out in many situations. +/// The basic strategy is before performing an operation that may block, you +/// can use TORCH_ASSERT_NO_GIL_WITHOUT_PYTHON_DEP() to assert that the GIL is +/// not held. This macro is to be used in contexts where no static dependency +/// on Python is available (we will handle indirecting a virtual call for you). +/// +/// If the GIL is held by a torchdeploy interpreter, we always report false. +/// If you are in a context where Python bindings are available, it's better +/// to directly assert on PyGILState_Check (as it avoids a vcall and also +/// works correctly with torchdeploy.) + +#define TORCH_ASSERT_NO_GIL_WITHOUT_PYTHON_DEP() \ + TORCH_INTERNAL_ASSERT( \ + !c10::impl::check_python_gil(), \ + "Holding GIL before a blocking operation! Please release the GIL before blocking, or see https://github.com/pytorch/pytorch/issues/56297 for how to release the GIL for destructors of objects") + +namespace c10::impl { + +C10_API bool check_python_gil(); + +struct C10_API PythonGILHooks { + virtual ~PythonGILHooks() = default; + // Returns true if we hold the GIL. If not linked against Python we + // always return false. + virtual bool check_python_gil() const = 0; +}; + +C10_API void SetPythonGILHooks(PythonGILHooks* factory); + +// DO NOT call this registerer from a torch deploy instance! You will clobber +// other registrations +struct C10_API PythonGILHooksRegisterer { + explicit PythonGILHooksRegisterer(PythonGILHooks* factory) { + SetPythonGILHooks(factory); + } + PythonGILHooksRegisterer(const PythonGILHooksRegisterer&) = delete; + PythonGILHooksRegisterer(PythonGILHooksRegisterer&&) = delete; + PythonGILHooksRegisterer& operator=(const PythonGILHooksRegisterer&) = delete; + PythonGILHooksRegisterer& operator=(PythonGILHooksRegisterer&&) = delete; + ~PythonGILHooksRegisterer() { + SetPythonGILHooks(nullptr); + } +}; + +} // namespace c10::impl diff --git a/phivenv/Lib/site-packages/torch/include/c10/util/Deprecated.h b/phivenv/Lib/site-packages/torch/include/c10/util/Deprecated.h new file mode 100644 index 0000000000000000000000000000000000000000..6242b93ea400702ed47c7ddcaa6f25b325359bd3 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/util/Deprecated.h @@ -0,0 +1,102 @@ +#pragma once + +/** + * This file provides portable macros for marking declarations + * as deprecated. You should generally use C10_DEPRECATED, + * except when marking 'using' declarations as deprecated, + * in which case you should use C10_DEFINE_DEPRECATED_USING + * (due to portability concerns). + */ + +// Sample usage: +// +// C10_DEPRECATED void bad_func(); +// struct C10_DEPRECATED BadStruct { +// ... +// }; + +// NB: __cplusplus doesn't work for MSVC, so for now MSVC always uses +// the "__declspec(deprecated)" implementation and not the C++14 +// "[[deprecated]]" attribute. We tried enabling "[[deprecated]]" for C++14 on +// MSVC, but ran into issues with some older MSVC versions. +#if (defined(__cplusplus) && __cplusplus >= 201402L) +#define C10_DEPRECATED [[deprecated]] +#define C10_DEPRECATED_MESSAGE(message) [[deprecated(message)]] +#elif defined(__GNUC__) +#define C10_DEPRECATED __attribute__((deprecated)) +// TODO Is there some way to implement this? +#define C10_DEPRECATED_MESSAGE(message) __attribute__((deprecated)) + +#elif defined(_MSC_VER) +#define C10_DEPRECATED __declspec(deprecated) +#define C10_DEPRECATED_MESSAGE(message) __declspec(deprecated(message)) +#else +#warning "You need to implement C10_DEPRECATED for this compiler" +#define C10_DEPRECATED +#endif + +// Sample usage: +// +// C10_DEFINE_DEPRECATED_USING(BadType, int) +// +// which is the portable version of +// +// using BadType [[deprecated]] = int; + +// technically [[deprecated]] syntax is from c++14 standard, but it works in +// many compilers. +#if defined(__has_cpp_attribute) +#if __has_cpp_attribute(deprecated) && !defined(__CUDACC__) +#define C10_DEFINE_DEPRECATED_USING(TypeName, TypeThingy) \ + using TypeName [[deprecated]] = TypeThingy; +#endif +#endif + +#if defined(_MSC_VER) +#if defined(__CUDACC__) +// neither [[deprecated]] nor __declspec(deprecated) work on nvcc on Windows; +// you get the error: +// +// error: attribute does not apply to any entity +// +// So we just turn the macro off in this case. +#if defined(C10_DEFINE_DEPRECATED_USING) +#undef C10_DEFINE_DEPRECATED_USING +#endif +#define C10_DEFINE_DEPRECATED_USING(TypeName, TypeThingy) \ + using TypeName = TypeThingy; +#else +// [[deprecated]] does work in windows without nvcc, though msc doesn't support +// `__has_cpp_attribute` when c++14 is supported, otherwise +// __declspec(deprecated) is used as the alternative. +#ifndef C10_DEFINE_DEPRECATED_USING +#if defined(_MSVC_LANG) && _MSVC_LANG >= 201402L +#define C10_DEFINE_DEPRECATED_USING(TypeName, TypeThingy) \ + using TypeName [[deprecated]] = TypeThingy; +#else +#define C10_DEFINE_DEPRECATED_USING(TypeName, TypeThingy) \ + using TypeName = __declspec(deprecated) TypeThingy; +#endif +#endif +#endif +#endif + +#if !defined(C10_DEFINE_DEPRECATED_USING) && defined(__GNUC__) +// nvcc has a bug where it doesn't understand __attribute__((deprecated)) +// declarations even when the host compiler supports it. We'll only use this gcc +// attribute when not cuda, and when using a GCC compiler that doesn't support +// the c++14 syntax we checked for above (available in __GNUC__ >= 5) +#if !defined(__CUDACC__) +#define C10_DEFINE_DEPRECATED_USING(TypeName, TypeThingy) \ + using TypeName __attribute__((deprecated)) = TypeThingy; +#else +// using cuda + gcc < 5, neither deprecated syntax is available so turning off. +#define C10_DEFINE_DEPRECATED_USING(TypeName, TypeThingy) \ + using TypeName = TypeThingy; +#endif +#endif + +#if !defined(C10_DEFINE_DEPRECATED_USING) +#warning "You need to implement C10_DEFINE_DEPRECATED_USING for this compiler" +#define C10_DEFINE_DEPRECATED_USING +#endif diff --git a/phivenv/Lib/site-packages/torch/include/c10/util/DimVector.h b/phivenv/Lib/site-packages/torch/include/c10/util/DimVector.h new file mode 100644 index 0000000000000000000000000000000000000000..0ae8169e86682bf2d084cba8e8eaa9a186faa9c3 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/util/DimVector.h @@ -0,0 +1,17 @@ +#pragma once + +#include +#include +#include +#include +#include + +namespace c10 { + +constexpr size_t kDimVectorStaticSize = C10_SIZES_AND_STRIDES_MAX_INLINE_SIZE; + +/// A container for sizes or strides +using DimVector = SmallVector; +using SymDimVector = SmallVector; + +} // namespace c10 diff --git a/phivenv/Lib/site-packages/torch/include/c10/util/DynamicCounter.h b/phivenv/Lib/site-packages/torch/include/c10/util/DynamicCounter.h new file mode 100644 index 0000000000000000000000000000000000000000..b6caa644f19c354b6e70550ce91bcc4fe970c65d --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/util/DynamicCounter.h @@ -0,0 +1,49 @@ +#pragma once + +#include +#include +#include + +#include + +namespace c10::monitor { + +class C10_API DynamicCounter { + public: + using Callback = std::function; + + // Creates a dynamic counter that can be queried at any point in time by + // multiple backends. Only one counter with a given key can exist at any point + // in time. + // + // The callback is invoked every time the counter is queried. + // The callback must be thread-safe. + // The callback must not throw. + // The callback must not block. + DynamicCounter(std::string_view key, Callback getCounterCallback); + + // Unregisters the callback. + // Waits for all ongoing callback invocations to finish. + ~DynamicCounter(); + + private: + struct Guard; + std::unique_ptr guard_; +}; + +namespace detail { +class DynamicCounterBackendIf { + public: + virtual ~DynamicCounterBackendIf() = default; + + virtual void registerCounter( + std::string_view key, + DynamicCounter::Callback getCounterCallback) = 0; + // MUST wait for all ongoing callback invocations to finish + virtual void unregisterCounter(std::string_view key) = 0; +}; + +void C10_API + registerDynamicCounterBackend(std::unique_ptr); +} // namespace detail +} // namespace c10::monitor diff --git a/phivenv/Lib/site-packages/torch/include/c10/util/Enumerate.h b/phivenv/Lib/site-packages/torch/include/c10/util/Enumerate.h new file mode 100644 index 0000000000000000000000000000000000000000..f51ed4ce7e304c6343df0693051ce68c2f06d93b --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/util/Enumerate.h @@ -0,0 +1,159 @@ +/* + * Ported from folly/container/Enumerate.h + */ + +#pragma once + +#include +#include + +#ifdef _WIN32 +#include // @manual +using ssize_t = SSIZE_T; +#endif + +#include + +/** + * Similar to Python's enumerate(), enumerate() can be used to + * iterate a range with a for-range loop, and it also allows to + * retrieve the count of iterations so far. Can be used in constexpr + * context. + * + * For example: + * + * for (auto&& [index, element] : enumerate(vec)) { + * // index is a const reference to a size_t containing the iteration count. + * // element is a reference to the type contained within vec, mutable + * // unless vec is const. + * } + * + * If the binding is const, the element reference is too. + * + * for (const auto&& [index, element] : enumerate(vec)) { + * // element is always a const reference. + * } + * + * It can also be used as follows: + * + * for (auto&& it : enumerate(vec)) { + * // *it is a reference to the current element. Mutable unless vec is const. + * // it->member can be used as well. + * // it.index contains the iteration count. + * } + * + * As before, const auto&& it can also be used. + */ + +namespace c10 { + +namespace detail { + +template +struct MakeConst { + using type = const T; +}; +template +struct MakeConst { + using type = const T&; +}; +template +struct MakeConst { + using type = const T*; +}; + +template +class Enumerator { + public: + constexpr explicit Enumerator(Iterator it) : it_(std::move(it)) {} + + class Proxy { + public: + using difference_type = ssize_t; + using value_type = typename std::iterator_traits::value_type; + using reference = typename std::iterator_traits::reference; + using pointer = typename std::iterator_traits::pointer; + using iterator_category = std::input_iterator_tag; + + C10_ALWAYS_INLINE constexpr explicit Proxy(const Enumerator& e) + : index(e.idx_), element(*e.it_) {} + + // Non-const Proxy: Forward constness from Iterator. + C10_ALWAYS_INLINE constexpr reference operator*() { + return element; + } + C10_ALWAYS_INLINE constexpr pointer operator->() { + return std::addressof(element); + } + + // Const Proxy: Force const references. + C10_ALWAYS_INLINE constexpr typename MakeConst::type operator*() + const { + return element; + } + C10_ALWAYS_INLINE constexpr typename MakeConst::type operator->() + const { + return std::addressof(element); + } + + public: + size_t index; + reference element; + }; + + C10_ALWAYS_INLINE constexpr Proxy operator*() const { + return Proxy(*this); + } + + C10_ALWAYS_INLINE constexpr Enumerator& operator++() { + ++it_; + ++idx_; + return *this; + } + + template + C10_ALWAYS_INLINE constexpr bool operator==( + const Enumerator& rhs) const { + return it_ == rhs.it_; + } + + template + C10_ALWAYS_INLINE constexpr bool operator!=( + const Enumerator& rhs) const { + return !(it_ == rhs.it_); + } + + private: + template + friend class Enumerator; + + Iterator it_; + size_t idx_ = 0; +}; + +template +class RangeEnumerator { + Range r_; + using BeginIteratorType = decltype(std::declval().begin()); + using EndIteratorType = decltype(std::declval().end()); + + public: + // NOLINTNEXTLINE(cppcoreguidelines-rvalue-reference-param-not-moved) + constexpr explicit RangeEnumerator(Range&& r) : r_(std::forward(r)) {} + + constexpr Enumerator begin() { + return Enumerator(r_.begin()); + } + constexpr Enumerator end() { + return Enumerator(r_.end()); + } +}; + +} // namespace detail + +template +constexpr detail::RangeEnumerator enumerate(Range&& r) { + return detail::RangeEnumerator(std::forward(r)); +} + +} // namespace c10 diff --git a/phivenv/Lib/site-packages/torch/include/c10/util/Exception.h b/phivenv/Lib/site-packages/torch/include/c10/util/Exception.h new file mode 100644 index 0000000000000000000000000000000000000000..e651112e03da05b712fc2b1448aa873ec8f37c1e --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/util/Exception.h @@ -0,0 +1,782 @@ +#ifndef C10_UTIL_EXCEPTION_H_ +#define C10_UTIL_EXCEPTION_H_ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#if defined(_MSC_VER) && _MSC_VER <= 1900 +#define __func__ __FUNCTION__ +#endif + +namespace c10 { + +/// The primary ATen error class. +/// Provides a complete error message with source location information via +/// `what()`, and a more concise message via `what_without_backtrace()`. +/// Don't throw this directly; use TORCH_CHECK/TORCH_INTERNAL_ASSERT instead. +/// +/// NB: c10::Error is handled specially by the default torch to suppress the +/// backtrace, see torch/csrc/Exceptions.h +class C10_API Error : public std::exception { + private: + // The actual error message. + std::string msg_; + + // Context for the message (in order of decreasing specificity). Context will + // be automatically formatted appropriately, so it is not necessary to add + // extra leading/trailing newlines to strings inside this vector + std::vector context_; + + // The C++ backtrace at the point when this exception was raised. This + // may be empty if there is no valid backtrace. (We don't use optional + // here to reduce the dependencies this file has.) + Backtrace backtrace_; + + // These two are derived fields from msg_stack_ and backtrace_, but we need + // fields for the strings so that we can return a const char* (as the + // signature of std::exception requires). Currently, the invariant + // is that these fields are ALWAYS populated consistently with respect + // to msg_stack_ and backtrace_. + mutable OptimisticLazy what_; + std::string what_without_backtrace_; + + // This is a little debugging trick: you can stash a relevant pointer + // in caller, and then when you catch the exception, you can compare + // against pointers you have on hand to get more information about + // where the exception came from. In Caffe2, this is used to figure + // out which operator raised an exception. + const void* caller_; + + public: + // PyTorch-style Error constructor. NB: the implementation of this + // is actually in Logging.cpp + Error(SourceLocation source_location, std::string msg); + + // Caffe2-style error message + Error( + const char* file, + const uint32_t line, + const char* condition, + const std::string& msg, + Backtrace backtrace, + const void* caller = nullptr); + + // Base constructor + Error( + std::string msg, + Backtrace backtrace = nullptr, + const void* caller = nullptr); + + // Add some new context to the message stack. The last added context + // will be formatted at the end of the context list upon printing. + // WARNING: This method is O(n) in the size of the stack, so don't go + // wild adding a ridiculous amount of context to error messages. + void add_context(std::string msg); + + const std::string& msg() const { + return msg_; + } + + const std::vector& context() const { + return context_; + } + + const Backtrace& backtrace() const; + + /// Returns the complete error message, including the source location. + /// The returned pointer is invalidated if you call add_context() on + /// this object. + const char* what() const noexcept override; + + const void* caller() const noexcept { + return caller_; + } + + /// Returns only the error message string, without source location. + /// The returned pointer is invalidated if you call add_context() on + /// this object. + virtual const char* what_without_backtrace() const noexcept { + return what_without_backtrace_.c_str(); + } + + private: + void refresh_what(); + std::string compute_what(bool include_backtrace) const; +}; + +class C10_API Warning { + public: + class C10_API UserWarning{}; + class C10_API DeprecationWarning{}; + + using warning_variant_t = std::variant; + + Warning( + warning_variant_t type, + const SourceLocation& source_location, + std::string msg, + bool verbatim); + + Warning( + warning_variant_t type, + SourceLocation source_location, + const char* msg, + bool verbatim); + + Warning( + warning_variant_t type, + SourceLocation source_location, + ::c10::detail::CompileTimeEmptyString msg, + bool verbatim); + + // Getters for members + warning_variant_t type() const; + const SourceLocation& source_location() const; + const std::string& msg() const; + bool verbatim() const; + + private: + // The type of warning + warning_variant_t type_; + + // Where the warning happened. + SourceLocation source_location_; + + // The actual warning message. + std::string msg_; + + // See note: [Verbatim Warnings] + bool verbatim_; +}; + +using UserWarning = Warning::UserWarning; +using DeprecationWarning = Warning::DeprecationWarning; + +// Issue a warning with a given message. Dispatched to the current +// warning handler. +void C10_API warn(const Warning& warning); + +class C10_API WarningHandler { + public: + virtual ~WarningHandler() = default; + /// The default warning handler. Prints the message to stderr. + virtual void process(const Warning& warning); +}; + +namespace WarningUtils { + +// Note: [Verbatim Warnings] +// Warnings originating in C++ code can appear out-of-place to Python users: +// a user runs a line in Python, but the warning references a line in C++. +// Some parts of PyTorch, like the JIT, are cognizant of this mismatch +// and take care to map warnings back to the user's program, but most +// of PyTorch simply throws a context-free warning. To allow warning +// handlers to add context where appropriate, warn takes the +// "verbatim" flag. When this is false a warning handler might append +// the C++ warning to a Python warning message that relates the warning +// back to the user's program. Callers who have already accounted for +// context in their warnings should set verbatim to true so their warnings +// appear without modification. + +/// Sets the global warning handler. This is not thread-safe, so it should +/// generally be called once during initialization or while holding the GIL +/// for programs that use python. +/// User is responsible for keeping the WarningHandler alive until +/// it is not needed. +C10_API void set_warning_handler(WarningHandler* handler) noexcept(true); +/// Gets the global warning handler. +C10_API WarningHandler* get_warning_handler() noexcept(true); + +class C10_API WarningHandlerGuard { + WarningHandler* prev_handler_; + + public: + WarningHandlerGuard(WarningHandler* new_handler) + : prev_handler_(c10::WarningUtils::get_warning_handler()) { + c10::WarningUtils::set_warning_handler(new_handler); + } + WarningHandlerGuard(WarningHandlerGuard&& other) = delete; + WarningHandlerGuard(const WarningHandlerGuard&) = delete; + WarningHandlerGuard& operator=(const WarningHandlerGuard&) = delete; + WarningHandlerGuard& operator=(WarningHandlerGuard&&) = delete; + ~WarningHandlerGuard() { + c10::WarningUtils::set_warning_handler(prev_handler_); + } +}; + +/// The TORCH_WARN_ONCE macro is difficult to test for. Use +/// setWarnAlways(true) to turn it into TORCH_WARN, which can be +/// tested for more easily. +C10_API void set_warnAlways(bool) noexcept(true); +C10_API bool get_warnAlways() noexcept(true); + +// A RAII guard that sets warn_always (not thread-local) on +// construction, and sets it back to the original value upon destruction. +struct C10_API WarnAlways { + public: + explicit WarnAlways(bool setting = true); + ~WarnAlways(); + + private: + bool prev_setting; +}; + +} // namespace WarningUtils + +// Like Error, but we always report the C++ backtrace, instead of only +// reporting when TORCH_SHOW_CPP_STACKTRACES +class C10_API ErrorAlwaysShowCppStacktrace : public Error { + using Error::Error; + const char* what_without_backtrace() const noexcept override { + return what(); + } +}; + +// Used in ATen for out-of-bound indices that can reasonably only be detected +// lazily inside a kernel (See: advanced indexing). These turn into +// IndexError when they cross to Python. +class C10_API IndexError : public Error { + using Error::Error; +}; + +// Used in ATen for invalid values. These turn into +// ValueError when they cross to Python. +class C10_API ValueError : public Error { + using Error::Error; +}; + +// Used in ATen for invalid types. These turn into +// TypeError when they cross to Python. +class C10_API TypeError : public Error { + using Error::Error; +}; + +// Used in ATen for functionality that is not implemented. These turn into +// NotImplementedError when they cross to Python. +class C10_API NotImplementedError : public Error { + using Error::Error; +}; + +// Used in ATen for non finite indices. These turn into +// ExitException when they cross to Python. +class C10_API EnforceFiniteError : public Error { + using Error::Error; +}; + +// Used in Onnxifi backend lowering. These turn into +// ExitException when they cross to Python. +class C10_API OnnxfiBackendSystemError : public Error { + using Error::Error; +}; + +// Used for numerical errors from the linalg module. These +// turn into LinAlgError when they cross into Python. +class C10_API LinAlgError : public Error { + using Error::Error; +}; + +class C10_API OutOfMemoryError : public Error { + using Error::Error; +}; + +// Used for handling syntactic errors in input arguments. +// These turn into SyntaxError when the cross into Python. +class C10_API SyntaxError : public Error { + using Error::Error; +}; + +// Raised when accelerator API call hits an error. +// These turn into AcceleratorError when the cross into Python +class C10_API AcceleratorError : public Error { + int32_t error_code; + + public: + AcceleratorError(SourceLocation loc, int32_t code, const std::string& msg) + : Error(loc, msg), error_code(code) {} + int32_t get_error_code() const { + return error_code; + } +}; + +// Base error type for all distributed errors. +// These turn into DistError when they cross into Python. +class C10_API DistError : public Error { + using Error::Error; +}; + +// Used for collective communication library errors from the distributed module. +// These turn into DistBackendError when they cross into Python. +class C10_API DistBackendError : public DistError { + using DistError::DistError; +}; + +// Used for errors originating from the store. +// These turn into DistStoreError when they cross into Python. +class C10_API DistStoreError : public DistError { + using DistError::DistError; +}; + +// Used for errors originating from the TCP/IP stack and not from collective +// libraries. These turn into DistNetworkError when they cross into Python. +class C10_API DistNetworkError : public DistError { + using DistError::DistError; +}; + +// Raised when a queue is empty and a non-blocking pop is called. +// Translated to torch.distributed.QueueEmptyError in Python +class C10_API DistQueueEmptyError : public DistStoreError { + using DistStoreError::DistStoreError; +}; + +// A utility function to return an exception std::string by prepending its +// exception type before its what() content +C10_API std::string GetExceptionString(const std::exception& e); + +} // namespace c10 + +// Private helper macro for implementing TORCH_INTERNAL_ASSERT and TORCH_CHECK +// +// Note: In the debug build With MSVC, __LINE__ might be of long type (a.k.a +// int32_t), which is different from the definition of `SourceLocation` that +// requires unsigned int (a.k.a uint32_t) and may cause a compile error with the +// message: error C2397: conversion from 'long' to 'uint32_t' requires a +// narrowing conversion Here the static cast is used to pass the build. if this +// is used inside a lambda the __func__ macro expands to operator(), which isn't +// very useful, but hard to fix in a macro so suppressing the warning. +#define C10_THROW_ERROR(err_type, msg) \ + throw ::c10::err_type( \ + {__func__, __FILE__, static_cast(__LINE__)}, msg) + +#define C10_BUILD_ERROR(err_type, msg) \ + ::c10::err_type({__func__, __FILE__, static_cast(__LINE__)}, msg) + +// Private helper macro for workaround MSVC misexpansion of nested macro +// invocations involving __VA_ARGS__. See +// https://stackoverflow.com/questions/5134523/msvc-doesnt-expand-va-args-correctly +#define C10_EXPAND_MSVC_WORKAROUND(x) x + +// On nvcc, C10_UNLIKELY thwarts missing return statement analysis. In cases +// where the unlikely expression may be a constant, use this macro to ensure +// return statement analysis keeps working (at the cost of not getting the +// likely/unlikely annotation on nvcc). +// https://github.com/pytorch/pytorch/issues/21418 +// +// Currently, this is only used in the error reporting macros below. If you +// want to use it more generally, move me to Macros.h +// +// TODO: Brian Vaughan observed that we might be able to get this to work on +// nvcc by writing some sort of C++ overload that distinguishes constexpr inputs +// from non-constexpr. Since there isn't any evidence that losing C10_UNLIKELY +// in nvcc is causing us perf problems, this is not yet implemented, but this +// might be an interesting piece of C++ code for an intrepid bootcamper to +// write. +#if defined(__CUDACC__) +#define C10_UNLIKELY_OR_CONST(e) e +#else +#define C10_UNLIKELY_OR_CONST(e) C10_UNLIKELY(e) +#endif + +// ---------------------------------------------------------------------------- +// Error reporting macros +// ---------------------------------------------------------------------------- + +#ifdef STRIP_ERROR_MESSAGES +#define TORCH_RETHROW(e, ...) throw +#else +#define TORCH_RETHROW(e, ...) \ + do { \ + e.add_context(::c10::str(__VA_ARGS__)); \ + throw; \ + } while (false) +#endif + +// A utility macro to provide assert()-like functionality; that is, enforcement +// of internal invariants in code. It supports an arbitrary number of extra +// arguments (evaluated only on failure), which will be printed in the assert +// failure message using operator<< (this is useful to print some variables +// which may be useful for debugging.) +// +// Usage: +// TORCH_INTERNAL_ASSERT(should_be_true); +// TORCH_INTERNAL_ASSERT(x == 0, "x = ", x); +// +// Assuming no bugs in PyTorch, the conditions tested by this macro should +// always be true; e.g., it should be possible to disable all of these +// conditions without changing observable user behavior. If you would like to +// do error reporting for user input, please use TORCH_CHECK instead. +// +// NOTE: It is SAFE to use this macro in production code; on failure, this +// simply raises an exception, it does NOT unceremoniously quit the process +// (unlike assert()). +// +#ifdef STRIP_ERROR_MESSAGES +#define TORCH_INTERNAL_ASSERT(cond, ...) \ + if (C10_UNLIKELY_OR_CONST(!(cond))) { \ + ::c10::detail::torchCheckFail( \ + __func__, \ + __FILE__, \ + static_cast(__LINE__), \ + #cond " INTERNAL ASSERT FAILED at " C10_STRINGIZE(__FILE__)); \ + } +#else +// It would be nice if we could build a combined string literal out of +// the TORCH_INTERNAL_ASSERT prefix and a user-provided string literal +// as the first argument, but there doesn't seem to be any good way to +// do that while still supporting having a first argument that isn't a +// string literal. +#define TORCH_INTERNAL_ASSERT(cond, ...) \ + if (C10_UNLIKELY_OR_CONST(!(cond))) { \ + ::c10::detail::torchInternalAssertFail( \ + __func__, \ + __FILE__, \ + static_cast(__LINE__), \ + #cond \ + " INTERNAL ASSERT FAILED at " C10_STRINGIZE(__FILE__) ":" C10_STRINGIZE( \ + __LINE__) ", please report a bug to PyTorch. ", \ + c10::str(__VA_ARGS__)); \ + } +#endif + +// A utility macro to make it easier to test for error conditions from user +// input. Like TORCH_INTERNAL_ASSERT, it supports an arbitrary number of extra +// arguments (evaluated only on failure), which will be printed in the error +// message using operator<< (e.g., you can pass any object which has +// operator<< defined. Most objects in PyTorch have these definitions!) +// +// Usage: +// TORCH_CHECK(should_be_true); // A default error message will be provided +// // in this case; but we recommend writing an +// // explicit error message, as it is more +// // user friendly. +// TORCH_CHECK(x == 0, "Expected x to be 0, but got ", x); +// +// On failure, this macro will raise an exception. If this exception propagates +// to Python, it will convert into a Python RuntimeError. +// +// NOTE: It is SAFE to use this macro in production code; on failure, this +// simply raises an exception, it does NOT unceremoniously quit the process +// (unlike CHECK() from glog.) +// +#define TORCH_CHECK_WITH(error_t, cond, ...) \ + TORCH_CHECK_WITH_MSG(error_t, cond, "", __VA_ARGS__) + +#ifdef STRIP_ERROR_MESSAGES +#define TORCH_CHECK_MSG(cond, type, ...) \ + (#cond #type " CHECK FAILED at " C10_STRINGIZE(__FILE__)) +#define TORCH_CHECK_WITH_MSG(error_t, cond, type, ...) \ + if (C10_UNLIKELY_OR_CONST(!(cond))) { \ + C10_THROW_ERROR(Error, TORCH_CHECK_MSG(cond, type, __VA_ARGS__)); \ + } +#else + +namespace c10::detail { +template +decltype(auto) torchCheckMsgImpl(const char* /*msg*/, const Args&... args) { + return ::c10::str(args...); +} +inline C10_API const char* torchCheckMsgImpl(const char* msg) { + return msg; +} +// If there is just 1 user-provided C-string argument, use it. +inline C10_API const char* torchCheckMsgImpl( + const char* /*msg*/, + const char* args) { + return args; +} +} // namespace c10::detail + +#define TORCH_CHECK_MSG(cond, type, ...) \ + (::c10::detail::torchCheckMsgImpl( \ + "Expected " #cond \ + " to be true, but got false. " \ + "(Could this error message be improved? If so, " \ + "please report an enhancement request to PyTorch.)", \ + ##__VA_ARGS__)) +#define TORCH_CHECK_WITH_MSG(error_t, cond, type, ...) \ + if (C10_UNLIKELY_OR_CONST(!(cond))) { \ + C10_THROW_ERROR(error_t, TORCH_CHECK_MSG(cond, type, __VA_ARGS__)); \ + } +#endif + +namespace c10::detail { + +[[noreturn]] C10_API void torchCheckFail( + const char* func, + const char* file, + uint32_t line, + const std::string& msg); +[[noreturn]] C10_API void torchCheckFail( + const char* func, + const char* file, + uint32_t line, + const char* msg); + +// The c10::str() call that creates userMsg can have 1 of 3 return +// types depending on the number and types of arguments passed to +// TORCH_INTERNAL_ASSERT. 0 arguments will get a +// CompileTimeEmptyString, 1 const char * will be passed straight +// through, and anything else will get converted to std::string. +[[noreturn]] C10_API void torchInternalAssertFail( + const char* func, + const char* file, + uint32_t line, + const char* condMsg, + const char* userMsg); +[[noreturn]] inline C10_API void torchInternalAssertFail( + const char* func, + const char* file, + uint32_t line, + const char* condMsg, + ::c10::detail::CompileTimeEmptyString /*userMsg*/) { + torchCheckFail(func, file, line, condMsg); +} +[[noreturn]] C10_API void torchInternalAssertFail( + const char* func, + const char* file, + uint32_t line, + const char* condMsg, + const std::string& userMsg); + +} // namespace c10::detail + +#ifdef STANDALONE_TORCH_HEADER + +// TORCH_CHECK throws std::runtime_error instead of c10::Error which is +// useful when certain headers are used in a libtorch-independent way, +// e.g. when Vectorized is used in AOTInductor generated code. +#ifdef STRIP_ERROR_MESSAGES +#define TORCH_CHECK(cond, ...) \ + if (C10_UNLIKELY_OR_CONST(!(cond))) { \ + throw std::runtime_error(TORCH_CHECK_MSG( \ + cond, \ + "", \ + __func__, \ + ", ", \ + __FILE__, \ + ":", \ + __LINE__, \ + ", ", \ + __VA_ARGS__)); \ + } +#else +#define TORCH_CHECK(cond, ...) \ + if (C10_UNLIKELY_OR_CONST(!(cond))) { \ + throw std::runtime_error(TORCH_CHECK_MSG( \ + cond, \ + "", \ + __func__, \ + ", ", \ + __FILE__, \ + ":", \ + __LINE__, \ + ", ", \ + ##__VA_ARGS__)); \ + } +#endif + +#else + +#ifdef STRIP_ERROR_MESSAGES +#define TORCH_CHECK(cond, ...) \ + if (C10_UNLIKELY_OR_CONST(!(cond))) { \ + ::c10::detail::torchCheckFail( \ + __func__, \ + __FILE__, \ + static_cast(__LINE__), \ + TORCH_CHECK_MSG(cond, "", __VA_ARGS__)); \ + } +#else +#define TORCH_CHECK(cond, ...) \ + if (C10_UNLIKELY_OR_CONST(!(cond))) { \ + ::c10::detail::torchCheckFail( \ + __func__, \ + __FILE__, \ + static_cast(__LINE__), \ + TORCH_CHECK_MSG(cond, "", ##__VA_ARGS__)); \ + } +#endif + +#endif + +// An utility macro that does what `TORCH_CHECK` does if compiled in the host +// code, otherwise does nothing. Supposed to be used in the code shared between +// host and device code as an alternative for `TORCH_CHECK`. +#if defined(__CUDACC__) || defined(__HIPCC__) +#define TORCH_CHECK_IF_NOT_ON_CUDA(cond, ...) +#else +#define TORCH_CHECK_IF_NOT_ON_CUDA(cond, ...) TORCH_CHECK(cond, ##__VA_ARGS__) +#endif + +// Debug only version of TORCH_INTERNAL_ASSERT. This macro only checks in debug +// build, and does nothing in release build. It is appropriate to use +// in situations where you want to add an assert to a hotpath, but it is +// too expensive to run this assert on production builds. +#ifdef NDEBUG +// Optimized version - generates no code. +#define TORCH_INTERNAL_ASSERT_DEBUG_ONLY(...) \ + while (false) \ + C10_EXPAND_MSVC_WORKAROUND(TORCH_INTERNAL_ASSERT(__VA_ARGS__)) +#else +#define TORCH_INTERNAL_ASSERT_DEBUG_ONLY(...) \ + C10_EXPAND_MSVC_WORKAROUND(TORCH_INTERNAL_ASSERT(__VA_ARGS__)) +#endif + +// TODO: We're going to get a lot of similar looking string literals +// this way; check if this actually affects binary size. + +// Like TORCH_CHECK, but raises LinAlgError instead of Error. +#define TORCH_CHECK_LINALG(cond, ...) \ + TORCH_CHECK_WITH_MSG(LinAlgError, cond, "LINALG", __VA_ARGS__) + +// Like TORCH_CHECK, but raises IndexErrors instead of Errors. +#define TORCH_CHECK_INDEX(cond, ...) \ + TORCH_CHECK_WITH_MSG(IndexError, cond, "INDEX", __VA_ARGS__) + +// Like TORCH_CHECK, but raises ValueErrors instead of Errors. +#define TORCH_CHECK_VALUE(cond, ...) \ + TORCH_CHECK_WITH_MSG(ValueError, cond, "VALUE", __VA_ARGS__) + +// Like TORCH_CHECK, but raises TypeErrors instead of Errors. +#define TORCH_CHECK_TYPE(cond, ...) \ + TORCH_CHECK_WITH_MSG(TypeError, cond, "TYPE", __VA_ARGS__) + +// Like TORCH_CHECK, but raises NotImplementedErrors instead of Errors. +#define TORCH_CHECK_NOT_IMPLEMENTED(cond, ...) \ + TORCH_CHECK_WITH_MSG(NotImplementedError, cond, "TYPE", __VA_ARGS__) + +#define TORCH_CHECK_ALWAYS_SHOW_CPP_STACKTRACE(cond, ...) \ + TORCH_CHECK_WITH_MSG( \ + ErrorAlwaysShowCppStacktrace, cond, "TYPE", ##__VA_ARGS__) + +#ifdef STRIP_ERROR_MESSAGES +#define WARNING_MESSAGE_STRING(...) \ + ::c10::detail::CompileTimeEmptyString {} +#else +#define WARNING_MESSAGE_STRING(...) ::c10::str(__VA_ARGS__) +#endif + +// Report a warning to the user. Accepts an arbitrary number of extra +// arguments which are concatenated into the warning message using operator<< +// +#ifdef DISABLE_WARN +#define _TORCH_WARN_WITH(...) ((void)0); +#else +#define _TORCH_WARN_WITH(warning_t, ...) \ + ::c10::warn(::c10::Warning( \ + warning_t(), \ + {__func__, __FILE__, static_cast(__LINE__)}, \ + WARNING_MESSAGE_STRING(__VA_ARGS__), \ + false)); +#endif + +#define TORCH_WARN(...) _TORCH_WARN_WITH(::c10::UserWarning, __VA_ARGS__); + +#define TORCH_WARN_DEPRECATION(...) \ + _TORCH_WARN_WITH(::c10::DeprecationWarning, __VA_ARGS__); + +// Report a warning to the user only once. Accepts an arbitrary number of extra +// arguments which are concatenated into the warning message using operator<< +// +#define _TORCH_WARN_ONCE(...) \ + [[maybe_unused]] static const auto C10_ANONYMOUS_VARIABLE( \ + torch_warn_once_) = [&] { \ + TORCH_WARN(__VA_ARGS__); \ + return true; \ + }() + +#ifdef DISABLE_WARN +#define TORCH_WARN_ONCE(...) ((void)0); +#else +#define TORCH_WARN_ONCE(...) \ + if (::c10::WarningUtils::get_warnAlways()) { \ + TORCH_WARN(__VA_ARGS__); \ + } else { \ + _TORCH_WARN_ONCE(__VA_ARGS__); \ + } +#endif + +// Report an error with a specific argument +// NOTE: using the argument name in TORCH_CHECK's message is preferred +#define TORCH_CHECK_ARG(cond, argN, ...) \ + TORCH_CHECK(cond, "invalid argument ", argN, ": ", __VA_ARGS__) + +// ---------------------------------------------------------------------------- +// Deprecated macros +// ---------------------------------------------------------------------------- + +namespace c10::detail { + +/* +// Deprecation disabled until we fix sites in our codebase +[[deprecated("AT_ERROR(msg) is deprecated, use TORCH_CHECK(false, msg) +instead.")]] +*/ +inline void deprecated_AT_ERROR() {} + +/* +// Deprecation disabled until we fix sites in our codebase +[[deprecated("AT_ASSERT is deprecated, if you mean to indicate an +internal invariant failure, use " \ + "TORCH_INTERNAL_ASSERT instead; if you mean to do user +error checking, use " \ "TORCH_CHECK. See +https://github.com/pytorch/pytorch/issues/20287 for more details.")]] +*/ +inline void deprecated_AT_ASSERT() {} + +/* +// Deprecation disabled until we fix sites in our codebase +[[deprecated("AT_ASSERTM is deprecated, if you mean to indicate an +internal invariant failure, use " \ + "TORCH_INTERNAL_ASSERT instead; if you mean to do user +error checking, use " \ "TORCH_CHECK. See +https://github.com/pytorch/pytorch/issues/20287 for more details.")]] +*/ +inline void deprecated_AT_ASSERTM() {} + +} // namespace c10::detail + +// Deprecated alias; this alias was deprecated because people kept mistakenly +// using it for user error checking. Use TORCH_INTERNAL_ASSERT or TORCH_CHECK +// instead. See https://github.com/pytorch/pytorch/issues/20287 for more +// details. +#define AT_ASSERT(...) \ + do { \ + ::c10::detail::deprecated_AT_ASSERT(); \ + C10_EXPAND_MSVC_WORKAROUND(TORCH_INTERNAL_ASSERT(__VA_ARGS__)); \ + } while (false) + +// Deprecated alias, like AT_ASSERT. The new TORCH_INTERNAL_ASSERT macro +// supports both 0-ary and variadic calls, so having a separate +// message-accepting macro is not necessary. +// +// NB: we MUST include cond explicitly here, as MSVC will miscompile the macro +// expansion, shunting all of __VA_ARGS__ to cond. An alternate workaround +// can be seen at +// https://stackoverflow.com/questions/5134523/msvc-doesnt-expand-va-args-correctly +#define AT_ASSERTM(cond, ...) \ + do { \ + ::c10::detail::deprecated_AT_ASSERTM(); \ + C10_EXPAND_MSVC_WORKAROUND(TORCH_INTERNAL_ASSERT(cond, __VA_ARGS__)); \ + } while (false) + +// Deprecated alias; this alias was deprecated because it represents extra API +// surface that makes it hard for people to understand what macro to use. +// Use TORCH_CHECK(false, ...) or TORCH_INTERNAL_ASSERT(false, ...) to +// unconditionally fail at a line of code. +#define AT_ERROR(...) \ + do { \ + ::c10::detail::deprecated_AT_ERROR(); \ + C10_EXPAND_MSVC_WORKAROUND(TORCH_CHECK(false, ::c10::str(__VA_ARGS__))); \ + } while (false) + +#endif // C10_UTIL_EXCEPTION_H_ diff --git a/phivenv/Lib/site-packages/torch/include/c10/util/ExclusivelyOwned.h b/phivenv/Lib/site-packages/torch/include/c10/util/ExclusivelyOwned.h new file mode 100644 index 0000000000000000000000000000000000000000..62a7cca47da91c98494b0006dea29d153746ecbe --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/util/ExclusivelyOwned.h @@ -0,0 +1,140 @@ +#pragma once + +#include + +namespace c10 { + +// See example implementation in TensorBase.h and TensorBody.h. +// Synopsis: +// +// repr_type -- type to use to store an owned T in ExclusivelyOwned. +// +// pointer_type -- pointer-esque type to return from +// ExclusivelyOwned's get() and operator*() methods. +// +// const_pointer_type -- similar to pointer_type, used for the const methods. +// +// static repr_type nullRepr() -- return a null instance of repr_type. +// +// template +// static repr_type createInPlace(Args&&... args) -- used by the in-place +// ExclusivelyOwned constructor. +// +// static repr_type moveToRepr(T&& x) -- move the given x into an +// instance of repr_type. used by the ExclusivelyOwned(T&&) +// constructor. +// +// static void destroyOwned(repr_type x) -- free memory for a +// known-exclusively-owned instance of x. Replaces calling repr_type's +// destructor. Being able to implement this more efficiently than +// repr_type's destructor is the main reason to use ExclusivelyOwned +// for a type. +// +// static T take(repr_type&) -- move out of the given repr_type into an owned T. +// +// static pointer_type getImpl(const repr_type&) -- return a pointer +// to the given repr_type. May take repr_type by value if that is more +// efficient. +template +struct ExclusivelyOwnedTraits; + +/// ExclusivelyOwned is a smart-pointer-like wrapper around an +/// exclusively-owned instance of some type T that normally has +/// mandatory reference counting (currently just Tensor). If you have +/// an isolated piece of code that knows that it has sole ownership of +/// an object of one of these types (i.e., because you created it +/// directly or using a factory function) and that object will not +/// escape from that isolated piece of code, then moving the object +/// into an ExclusivelyOwned will avoid an atomic reference count +/// decrement at destruction time. +/// +/// If you directly create the Tensor in the first +/// place, you can use the in_place constructor of ExclusivelyOwned to +/// additionally avoid doing any stores to initialize the refcount & +/// weakcount. +template +class ExclusivelyOwned { + using EOT = ExclusivelyOwnedTraits; + typename ExclusivelyOwnedTraits::repr_type repr_; + + public: + ExclusivelyOwned() : repr_(EOT::nullRepr()) {} + + explicit ExclusivelyOwned(T&& t) : repr_(EOT::moveToRepr(std::move(t))) {} + + template + explicit ExclusivelyOwned(std::in_place_t, Args&&... args) + : repr_(EOT::createInPlace(std::forward(args)...)) {} + + ExclusivelyOwned(const ExclusivelyOwned&) = delete; + + ExclusivelyOwned(ExclusivelyOwned&& rhs) noexcept + : repr_(std::move(rhs.repr_)) { + rhs.repr_ = EOT::nullRepr(); + } + + ExclusivelyOwned& operator=(const ExclusivelyOwned&) = delete; + + ExclusivelyOwned& operator=(ExclusivelyOwned&& rhs) noexcept { + EOT::destroyOwned(repr_); + repr_ = std::move(rhs.repr_); + rhs.repr_ = EOT::nullRepr(); + return *this; + } + + ExclusivelyOwned& operator=(T&& rhs) noexcept { + EOT::destroyOwned(repr_); + repr_ = EOT::moveToRepr(std::move(rhs)); + return *this; + } + + ~ExclusivelyOwned() { + EOT::destroyOwned(repr_); + // Don't bother to call the destructor of repr_, since we already + // did specialized destruction for the exclusively-owned case in + // destroyOwned! + } + + // We don't provide this because it would require us to be able to + // differentiate an owned-but-empty T from a lack of T. This is + // particularly problematic for Tensor, which wants to use an + // undefined Tensor as its null state. + explicit operator bool() const noexcept = delete; + + operator T() && { + return take(); + } + + // NOTE: the equivalent operation on MaybeOwned is a moving + // operator*. For ExclusivelyOwned, take() and operator*() may well + // have different return types, so they are different functions. + T take() && { + return EOT::take(repr_); + } + + typename EOT::const_pointer_type operator->() const { + return get(); + } + + typename EOT::const_pointer_type get() const { + return EOT::getImpl(repr_); + } + + typename EOT::pointer_type operator->() { + return get(); + } + + typename EOT::pointer_type get() { + return EOT::getImpl(repr_); + } + + std::remove_pointer_t& operator*() const { + return *get(); + } + + std::remove_pointer_t& operator*() { + return *get(); + } +}; + +} // namespace c10 diff --git a/phivenv/Lib/site-packages/torch/include/c10/util/ExclusivelyOwnedTensorTraits.h b/phivenv/Lib/site-packages/torch/include/c10/util/ExclusivelyOwnedTensorTraits.h new file mode 100644 index 0000000000000000000000000000000000000000..4d61440a7b4b1b6f8a38e12de0799822c6d4223b --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/util/ExclusivelyOwnedTensorTraits.h @@ -0,0 +1,75 @@ +#pragma once + +#include +#include + +#include + +namespace c10 { +// Shared ExclusivelyOwnedTraits implementation between caffe2::Tensor and +// at::TensorBase. +template +struct ExclusivelyOwnedTensorTraits { + using repr_type = TensorType; + using pointer_type = TensorType*; + using const_pointer_type = const TensorType*; + + static repr_type nullRepr() { + return TensorType(); + } + + template + static repr_type createInPlace(Args&&... args) { + return TensorType(std::forward(args)...); + } + + static repr_type moveToRepr(TensorType&& x) { + return std::move(x); + } + + static void destroyOwned(TensorType& x) { + TensorImpl* const toDestroy = x.unsafeReleaseTensorImpl(); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY( + toDestroy != nullptr, "Tensor somehow got null TensorImpl?"); + // May be 0 because UndefinedTensorImpl doesn't get its refcount + // incremented. + const bool isUndefined = toDestroy == UndefinedTensorImpl::singleton(); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY( + toDestroy->refcount_ == 1 || (toDestroy->refcount_ == 0 && isUndefined), + "ExclusivelyOwned destroyed with isUndefined ", + isUndefined, + " and refcount ", + toDestroy->refcount_, + ", expected 1 or, if isUndefined, 0!"); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY( + toDestroy->weakcount_ == 1 || + (toDestroy->weakcount_ == 0 && + toDestroy == UndefinedTensorImpl::singleton()), + "ExclusivelyOwned destroyed with isUndefined ", + isUndefined, + " and weakcount ", + toDestroy->weakcount_, + ", expected 1 or, if isUndefined, 0!"); + if (!isUndefined) { +#ifndef NDEBUG + // Needed to pass the debug assertions in ~intrusive_ptr_target. + toDestroy->refcount_ = 0; + toDestroy->weakcount_ = 0; +#endif + delete toDestroy; + } + } + + static TensorType take(TensorType& x) { + return std::move(x); + } + + static pointer_type getImpl(repr_type& x) { + return &x; + } + + static const_pointer_type getImpl(const repr_type& x) { + return &x; + } +}; +} // namespace c10 diff --git a/phivenv/Lib/site-packages/torch/include/c10/util/FbcodeMaps.h b/phivenv/Lib/site-packages/torch/include/c10/util/FbcodeMaps.h new file mode 100644 index 0000000000000000000000000000000000000000..3b8abdbcfbd99deb2842bd00bd9dddcd6b2713e1 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/util/FbcodeMaps.h @@ -0,0 +1,29 @@ +#ifndef C10_UTIL_FBCODEMAPS_H_ +#define C10_UTIL_FBCODEMAPS_H_ + +// Map typedefs so that we can use folly's F14 maps in fbcode without +// taking a folly dependency. + +#ifdef FBCODE_CAFFE2 +#include +#include +#else +#include +#include +#endif + +namespace c10 { +#ifdef FBCODE_CAFFE2 +template +using FastMap = folly::F14FastMap; +template +using FastSet = folly::F14FastSet; +#else +template +using FastMap = std::unordered_map; +template +using FastSet = std::unordered_set; +#endif +} // namespace c10 + +#endif // C10_UTIL_FBCODEMAPS_H_ diff --git a/phivenv/Lib/site-packages/torch/include/c10/util/Flags.h b/phivenv/Lib/site-packages/torch/include/c10/util/Flags.h new file mode 100644 index 0000000000000000000000000000000000000000..70fd62988b69a187a04e0baa13e1888422e4c60d --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/util/Flags.h @@ -0,0 +1,242 @@ +#ifndef C10_UTIL_FLAGS_H_ +#define C10_UTIL_FLAGS_H_ + +/* Commandline flags support for C10. + * + * This is a portable commandline flags tool for c10, so we can optionally + * choose to use gflags or a lightweight custom implementation if gflags is + * not possible on a certain platform. If you have gflags installed, set the + * macro C10_USE_GFLAGS will seamlessly route everything to gflags. + * + * To define a flag foo of type bool default to true, do the following in the + * *global* namespace: + * C10_DEFINE_bool(foo, true, "An example."); + * + * To use it in another .cc file, you can use C10_DECLARE_* as follows: + * C10_DECLARE_bool(foo); + * + * In both cases, you can then access the flag via FLAGS_foo. + * + * It is recommended that you build with gflags. To learn more about the flags + * usage, refer to the gflags page here: + * + * https://gflags.github.io/gflags/ + * + * Note about Python users / devs: gflags is initiated from a C++ function + * ParseCommandLineFlags, and is usually done in native binaries in the main + * function. As Python does not have a modifiable main function, it is usually + * difficult to change the flags after Python starts. Hence, it is recommended + * that one sets the default value of the flags to one that's acceptable in + * general - that will allow Python to run without wrong flags. + */ + +#include +#include + +#include + +namespace c10 { +/** + * Sets the usage message when a commandline tool is called with "--help". + */ +C10_API void SetUsageMessage(const std::string& str); + +/** + * Returns the usage message for the commandline tool set by SetUsageMessage. + */ +C10_API const char* UsageMessage(); + +/** + * Parses the commandline flags. + * + * This command parses all the commandline arguments passed in via pargc + * and argv. Once it is finished, partc and argv will contain the remaining + * commandline args that c10 does not deal with. Note that following + * convention, argv[0] contains the binary name and is not parsed. + */ +C10_API bool ParseCommandLineFlags(int* pargc, char*** pargv); + +/** + * Checks if the commandline flags has already been passed. + */ +C10_API bool CommandLineFlagsHasBeenParsed(); + +} // namespace c10 + +//////////////////////////////////////////////////////////////////////////////// +// Below are gflags and non-gflags specific implementations. +// In general, they define the following macros for one to declare (use +// C10_DECLARE) or define (use C10_DEFINE) flags: +// C10_{DECLARE,DEFINE}_{int,int64,double,bool,string} +//////////////////////////////////////////////////////////////////////////////// + +#ifdef C10_USE_GFLAGS + +//////////////////////////////////////////////////////////////////////////////// +// Begin gflags section: most functions are basically rerouted to gflags. +//////////////////////////////////////////////////////////////////////////////// +#include + +// C10 uses hidden visibility by default. However, in gflags, it only uses +// export on Windows platform (with dllexport) but not on linux/mac (with +// default visibility). As a result, to ensure that we are always exporting +// global variables, we will redefine the GFLAGS_DLL_DEFINE_FLAG macro if we +// are building C10 as a shared library. +// This has to be done after the inclusion of gflags, because some early +// versions of gflags.h (e.g. 2.0 on ubuntu 14.04) directly defines the +// macros, so we need to do definition after gflags is done. +#ifdef GFLAGS_DLL_DEFINE_FLAG +#undef GFLAGS_DLL_DEFINE_FLAG +#endif // GFLAGS_DLL_DEFINE_FLAG +#ifdef GFLAGS_DLL_DECLARE_FLAG +#undef GFLAGS_DLL_DECLARE_FLAG +#endif // GFLAGS_DLL_DECLARE_FLAG +#define GFLAGS_DLL_DEFINE_FLAG C10_EXPORT +#define GFLAGS_DLL_DECLARE_FLAG C10_IMPORT + +// gflags before 2.0 uses namespace google and after 2.1 uses namespace gflags. +// Using GFLAGS_GFLAGS_H_ to capture this change. +#ifndef GFLAGS_GFLAGS_H_ +namespace gflags = google; +#endif // GFLAGS_GFLAGS_H_ + +// Motivation about the gflags wrapper: +// (1) We would need to make sure that the gflags version and the non-gflags +// version of C10 are going to expose the same flags abstraction. One should +// explicitly use FLAGS_flag_name to access the flags. +// (2) For flag names, it is recommended to start with c10_ to distinguish it +// from regular gflags flags. For example, do +// C10_DEFINE_BOOL(c10_my_flag, true, "An example"); +// to allow one to use FLAGS_c10_my_flag. +// (3) Gflags has a design issue that does not properly expose the global flags, +// if one builds the library with -fvisibility=hidden. The current gflags (as of +// Aug 2018) only deals with the Windows case using dllexport, and not the Linux +// counterparts. As a result, we will explicitly use C10_EXPORT to export the +// flags defined in C10. This is done via a global reference, so the flag +// itself is not duplicated - under the hood it is the same global gflags flag. +#define C10_GFLAGS_DEF_WRAPPER(type, real_type, name, default_value, help_str) \ + DEFINE_##type(name, default_value, help_str); + +#define C10_DEFINE_int(name, default_value, help_str) \ + C10_GFLAGS_DEF_WRAPPER(int32, gflags::int32, name, default_value, help_str) +#define C10_DEFINE_int32(name, default_value, help_str) \ + C10_DEFINE_int(name, default_value, help_str) +#define C10_DEFINE_int64(name, default_value, help_str) \ + C10_GFLAGS_DEF_WRAPPER(int64, gflags::int64, name, default_value, help_str) +#define C10_DEFINE_double(name, default_value, help_str) \ + C10_GFLAGS_DEF_WRAPPER(double, double, name, default_value, help_str) +#define C10_DEFINE_bool(name, default_value, help_str) \ + C10_GFLAGS_DEF_WRAPPER(bool, bool, name, default_value, help_str) +#define C10_DEFINE_string(name, default_value, help_str) \ + C10_GFLAGS_DEF_WRAPPER(string, ::fLS::clstring, name, default_value, help_str) + +// DECLARE_typed_var should be used in header files and in the global namespace. +#define C10_GFLAGS_DECLARE_WRAPPER(type, real_type, name) DECLARE_##type(name); + +#define C10_DECLARE_int(name) \ + C10_GFLAGS_DECLARE_WRAPPER(int32, gflags::int32, name) +#define C10_DECLARE_int32(name) C10_DECLARE_int(name) +#define C10_DECLARE_int64(name) \ + C10_GFLAGS_DECLARE_WRAPPER(int64, gflags::int64, name) +#define C10_DECLARE_double(name) \ + C10_GFLAGS_DECLARE_WRAPPER(double, double, name) +#define C10_DECLARE_bool(name) C10_GFLAGS_DECLARE_WRAPPER(bool, bool, name) +#define C10_DECLARE_string(name) \ + C10_GFLAGS_DECLARE_WRAPPER(string, ::fLS::clstring, name) + +#define TORCH_DECLARE_int(name) C10_DECLARE_int(name) +#define TORCH_DECLARE_int32(name) C10_DECLARE_int32(name) +#define TORCH_DECLARE_int64(name) C10_DECLARE_int64(name) +#define TORCH_DECLARE_double(name) C10_DECLARE_double(name) +#define TORCH_DECLARE_bool(name) C10_DECLARE_bool(name) +#define TORCH_DECLARE_string(name) C10_DECLARE_string(name) + +//////////////////////////////////////////////////////////////////////////////// +// End gflags section. +//////////////////////////////////////////////////////////////////////////////// + +#else // C10_USE_GFLAGS + +//////////////////////////////////////////////////////////////////////////////// +// Begin non-gflags section: providing equivalent functionality. +//////////////////////////////////////////////////////////////////////////////// + +namespace c10 { + +class C10_API C10FlagParser { + public: + bool success() { + return success_; + } + + protected: + template + bool Parse(const std::string& content, T* value); + bool success_{false}; +}; + +C10_DECLARE_REGISTRY(C10FlagsRegistry, C10FlagParser, const std::string&); + +} // namespace c10 + +// The macros are defined outside the c10 namespace. In your code, you should +// write the C10_DEFINE_* and C10_DECLARE_* macros outside any namespace +// as well. + +#define C10_DEFINE_typed_var(type, name, default_value, help_str) \ + C10_EXPORT type FLAGS_##name = default_value; \ + namespace c10 { \ + namespace { \ + class C10FlagParser_##name : public C10FlagParser { \ + public: \ + explicit C10FlagParser_##name(const std::string& content) { \ + success_ = C10FlagParser::Parse(content, &FLAGS_##name); \ + } \ + }; \ + RegistererC10FlagsRegistry g_C10FlagsRegistry_##name( \ + #name, \ + C10FlagsRegistry(), \ + RegistererC10FlagsRegistry::DefaultCreator, \ + "(" #type ", default " #default_value ") " help_str); \ + } \ + } + +#define C10_DEFINE_int(name, default_value, help_str) \ + C10_DEFINE_typed_var(int, name, default_value, help_str) +#define C10_DEFINE_int32(name, default_value, help_str) \ + C10_DEFINE_int(name, default_value, help_str) +#define C10_DEFINE_int64(name, default_value, help_str) \ + C10_DEFINE_typed_var(int64_t, name, default_value, help_str) +#define C10_DEFINE_double(name, default_value, help_str) \ + C10_DEFINE_typed_var(double, name, default_value, help_str) +#define C10_DEFINE_bool(name, default_value, help_str) \ + C10_DEFINE_typed_var(bool, name, default_value, help_str) +#define C10_DEFINE_string(name, default_value, help_str) \ + C10_DEFINE_typed_var(std::string, name, default_value, help_str) + +// DECLARE_typed_var should be used in header files and in the global namespace. +#define C10_DECLARE_typed_var(type, name) C10_API extern type FLAGS_##name + +#define C10_DECLARE_int(name) C10_DECLARE_typed_var(int, name) +#define C10_DECLARE_int32(name) C10_DECLARE_int(name) +#define C10_DECLARE_int64(name) C10_DECLARE_typed_var(int64_t, name) +#define C10_DECLARE_double(name) C10_DECLARE_typed_var(double, name) +#define C10_DECLARE_bool(name) C10_DECLARE_typed_var(bool, name) +#define C10_DECLARE_string(name) C10_DECLARE_typed_var(std::string, name) + +#define TORCH_DECLARE_typed_var(type, name) TORCH_API extern type FLAGS_##name + +#define TORCH_DECLARE_int(name) TORCH_DECLARE_typed_var(int, name) +#define TORCH_DECLARE_int32(name) TORCH_DECLARE_int(name) +#define TORCH_DECLARE_int64(name) TORCH_DECLARE_typed_var(int64_t, name) +#define TORCH_DECLARE_double(name) TORCH_DECLARE_typed_var(double, name) +#define TORCH_DECLARE_bool(name) TORCH_DECLARE_typed_var(bool, name) +#define TORCH_DECLARE_string(name) TORCH_DECLARE_typed_var(std::string, name) + +//////////////////////////////////////////////////////////////////////////////// +// End non-gflags section. +//////////////////////////////////////////////////////////////////////////////// + +#endif // C10_USE_GFLAGS + +#endif // C10_UTIL_FLAGS_H_ diff --git a/phivenv/Lib/site-packages/torch/include/c10/util/Float4_e2m1fn_x2.h b/phivenv/Lib/site-packages/torch/include/c10/util/Float4_e2m1fn_x2.h new file mode 100644 index 0000000000000000000000000000000000000000..b3835aa53366ef63681e64dda09813b13f7c0537 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/util/Float4_e2m1fn_x2.h @@ -0,0 +1,28 @@ +#pragma once +#include + +#include + +/// Defines the Float4_e2m1fn_x2 type (4-bit floating-point, two elements packed +/// into one byte). This is the FP4 dtype from the OCP MX format spec +/// (https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf, +/// Section 5.3.3) +/// +/// Given two high precision values val0 and val1, here is the +/// binary configuration of their packed representation, from MSB to LSB: +/// +/// original value | val1 : val0 +/// ======================================== +/// bit index (MSB==7, LSB==0) | 7654 : 3210 +/// sign/exponent/mantissa | seem : seem +/// + +namespace c10 { + +struct alignas(1) Float4_e2m1fn_x2 { + uint8_t val_; + Float4_e2m1fn_x2() = default; + C10_HOST_DEVICE explicit Float4_e2m1fn_x2(uint8_t val) : val_(val) {} +}; + +} // namespace c10 diff --git a/phivenv/Lib/site-packages/torch/include/c10/util/Float8_e4m3fn-inl.h b/phivenv/Lib/site-packages/torch/include/c10/util/Float8_e4m3fn-inl.h new file mode 100644 index 0000000000000000000000000000000000000000..27c57599aaf1be2b2e0f57511d6c0e557ee4bffa --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/util/Float8_e4m3fn-inl.h @@ -0,0 +1,274 @@ +#pragma once + +#include +#include +#include + +C10_CLANG_DIAGNOSTIC_PUSH() +#if C10_CLANG_HAS_WARNING("-Wimplicit-int-float-conversion") +C10_CLANG_DIAGNOSTIC_IGNORE("-Wimplicit-int-float-conversion") +#endif + +namespace c10 { + +/// Constructors + +inline C10_HOST_DEVICE Float8_e4m3fn::Float8_e4m3fn(float value) + : x(detail::fp8e4m3fn_from_fp32_value(value)) {} + +/// Implicit conversions + +inline C10_HOST_DEVICE Float8_e4m3fn::operator float() const { + return detail::fp8e4m3fn_to_fp32_value(x); +} + +/// Special values helper + +inline C10_HOST_DEVICE bool Float8_e4m3fn::isnan() const { + return (x & 0b01111111) == 0b01111111; +} + +/// Arithmetic + +inline C10_HOST_DEVICE Float8_e4m3fn +operator+(const Float8_e4m3fn& a, const Float8_e4m3fn& b) { + return static_cast(a) + static_cast(b); +} + +inline C10_HOST_DEVICE Float8_e4m3fn +operator-(const Float8_e4m3fn& a, const Float8_e4m3fn& b) { + return static_cast(a) - static_cast(b); +} + +inline C10_HOST_DEVICE Float8_e4m3fn +operator*(const Float8_e4m3fn& a, const Float8_e4m3fn& b) { + return static_cast(a) * static_cast(b); +} + +inline C10_HOST_DEVICE Float8_e4m3fn operator/( + const Float8_e4m3fn& a, + const Float8_e4m3fn& b) __ubsan_ignore_float_divide_by_zero__ { + return static_cast(a) / static_cast(b); +} + +inline C10_HOST_DEVICE Float8_e4m3fn operator-(const Float8_e4m3fn& a) { + return -static_cast(a); +} + +inline C10_HOST_DEVICE Float8_e4m3fn& operator+=( + Float8_e4m3fn& a, + const Float8_e4m3fn& b) { + a = a + b; + return a; +} + +inline C10_HOST_DEVICE Float8_e4m3fn& operator-=( + Float8_e4m3fn& a, + const Float8_e4m3fn& b) { + a = a - b; + return a; +} + +inline C10_HOST_DEVICE Float8_e4m3fn& operator*=( + Float8_e4m3fn& a, + const Float8_e4m3fn& b) { + a = a * b; + return a; +} + +inline C10_HOST_DEVICE Float8_e4m3fn& operator/=( + Float8_e4m3fn& a, + const Float8_e4m3fn& b) { + a = a / b; + return a; +} + +/// Arithmetic with floats + +inline C10_HOST_DEVICE float operator+(Float8_e4m3fn a, float b) { + return static_cast(a) + b; +} +inline C10_HOST_DEVICE float operator-(Float8_e4m3fn a, float b) { + return static_cast(a) - b; +} +inline C10_HOST_DEVICE float operator*(Float8_e4m3fn a, float b) { + return static_cast(a) * b; +} +inline C10_HOST_DEVICE float operator/(Float8_e4m3fn a, float b) + __ubsan_ignore_float_divide_by_zero__ { + return static_cast(a) / b; +} + +inline C10_HOST_DEVICE float operator+(float a, Float8_e4m3fn b) { + return a + static_cast(b); +} +inline C10_HOST_DEVICE float operator-(float a, Float8_e4m3fn b) { + return a - static_cast(b); +} +inline C10_HOST_DEVICE float operator*(float a, Float8_e4m3fn b) { + return a * static_cast(b); +} +inline C10_HOST_DEVICE float operator/(float a, Float8_e4m3fn b) + __ubsan_ignore_float_divide_by_zero__ { + return a / static_cast(b); +} + +inline C10_HOST_DEVICE float& operator+=(float& a, const Float8_e4m3fn& b) { + return a += static_cast(b); +} +inline C10_HOST_DEVICE float& operator-=(float& a, const Float8_e4m3fn& b) { + return a -= static_cast(b); +} +inline C10_HOST_DEVICE float& operator*=(float& a, const Float8_e4m3fn& b) { + return a *= static_cast(b); +} +inline C10_HOST_DEVICE float& operator/=(float& a, const Float8_e4m3fn& b) { + return a /= static_cast(b); +} + +/// Arithmetic with doubles + +inline C10_HOST_DEVICE double operator+(Float8_e4m3fn a, double b) { + return static_cast(a) + b; +} +inline C10_HOST_DEVICE double operator-(Float8_e4m3fn a, double b) { + return static_cast(a) - b; +} +inline C10_HOST_DEVICE double operator*(Float8_e4m3fn a, double b) { + return static_cast(a) * b; +} +inline C10_HOST_DEVICE double operator/(Float8_e4m3fn a, double b) + __ubsan_ignore_float_divide_by_zero__ { + return static_cast(a) / b; +} + +inline C10_HOST_DEVICE double operator+(double a, Float8_e4m3fn b) { + return a + static_cast(b); +} +inline C10_HOST_DEVICE double operator-(double a, Float8_e4m3fn b) { + return a - static_cast(b); +} +inline C10_HOST_DEVICE double operator*(double a, Float8_e4m3fn b) { + return a * static_cast(b); +} +inline C10_HOST_DEVICE double operator/(double a, Float8_e4m3fn b) + __ubsan_ignore_float_divide_by_zero__ { + return a / static_cast(b); +} + +/// Arithmetic with ints + +inline C10_HOST_DEVICE Float8_e4m3fn operator+(Float8_e4m3fn a, int b) { + return a + static_cast(b); +} +inline C10_HOST_DEVICE Float8_e4m3fn operator-(Float8_e4m3fn a, int b) { + return a - static_cast(b); +} +inline C10_HOST_DEVICE Float8_e4m3fn operator*(Float8_e4m3fn a, int b) { + return a * static_cast(b); +} +inline C10_HOST_DEVICE Float8_e4m3fn operator/(Float8_e4m3fn a, int b) { + return a / static_cast(b); +} + +inline C10_HOST_DEVICE Float8_e4m3fn operator+(int a, Float8_e4m3fn b) { + return static_cast(a) + b; +} +inline C10_HOST_DEVICE Float8_e4m3fn operator-(int a, Float8_e4m3fn b) { + return static_cast(a) - b; +} +inline C10_HOST_DEVICE Float8_e4m3fn operator*(int a, Float8_e4m3fn b) { + return static_cast(a) * b; +} +inline C10_HOST_DEVICE Float8_e4m3fn operator/(int a, Float8_e4m3fn b) { + return static_cast(a) / b; +} + +//// Arithmetic with int64_t + +inline C10_HOST_DEVICE Float8_e4m3fn operator+(Float8_e4m3fn a, int64_t b) { + return a + static_cast(b); +} +inline C10_HOST_DEVICE Float8_e4m3fn operator-(Float8_e4m3fn a, int64_t b) { + return a - static_cast(b); +} +inline C10_HOST_DEVICE Float8_e4m3fn operator*(Float8_e4m3fn a, int64_t b) { + return a * static_cast(b); +} +inline C10_HOST_DEVICE Float8_e4m3fn operator/(Float8_e4m3fn a, int64_t b) { + return a / static_cast(b); +} + +inline C10_HOST_DEVICE Float8_e4m3fn operator+(int64_t a, Float8_e4m3fn b) { + return static_cast(a) + b; +} +inline C10_HOST_DEVICE Float8_e4m3fn operator-(int64_t a, Float8_e4m3fn b) { + return static_cast(a) - b; +} +inline C10_HOST_DEVICE Float8_e4m3fn operator*(int64_t a, Float8_e4m3fn b) { + return static_cast(a) * b; +} +inline C10_HOST_DEVICE Float8_e4m3fn operator/(int64_t a, Float8_e4m3fn b) { + return static_cast(a) / b; +} + +/// NOTE: we do not define comparisons directly and instead rely on the implicit +/// conversion from c10::Float8_e4m3fn to float. + +} // namespace c10 + +namespace std { + +template <> +class numeric_limits { + public: + static constexpr bool is_specialized = true; + static constexpr bool is_signed = true; + static constexpr bool is_integer = false; + static constexpr bool is_exact = false; + static constexpr bool has_infinity = false; + static constexpr bool has_quiet_NaN = true; + static constexpr bool has_signaling_NaN = false; + static constexpr auto has_denorm = true; + static constexpr auto has_denorm_loss = true; + static constexpr auto round_style = numeric_limits::round_style; + static constexpr bool is_iec559 = false; + static constexpr bool is_bounded = true; + static constexpr bool is_modulo = false; + static constexpr int digits = 4; + static constexpr int digits10 = 0; + static constexpr int max_digits10 = 3; + static constexpr int radix = 2; + static constexpr int min_exponent = -5; + static constexpr int min_exponent10 = -1; + static constexpr int max_exponent = 8; + static constexpr int max_exponent10 = 2; + static constexpr auto traps = numeric_limits::traps; + static constexpr auto tinyness_before = false; + + static constexpr c10::Float8_e4m3fn min() { + return c10::Float8_e4m3fn(0x08, c10::Float8_e4m3fn::from_bits()); + } + static constexpr c10::Float8_e4m3fn lowest() { + return c10::Float8_e4m3fn(0xFE, c10::Float8_e4m3fn::from_bits()); + } + static constexpr c10::Float8_e4m3fn max() { + return c10::Float8_e4m3fn(0x7E, c10::Float8_e4m3fn::from_bits()); + } + static constexpr c10::Float8_e4m3fn epsilon() { + return c10::Float8_e4m3fn(0x20, c10::Float8_e4m3fn::from_bits()); + } + static constexpr c10::Float8_e4m3fn round_error() { + return c10::Float8_e4m3fn(0x30, c10::Float8_e4m3fn::from_bits()); + } + static constexpr c10::Float8_e4m3fn quiet_NaN() { + return c10::Float8_e4m3fn(0x7F, c10::Float8_e4m3fn::from_bits()); + } + static constexpr c10::Float8_e4m3fn denorm_min() { + return c10::Float8_e4m3fn(0x01, c10::Float8_e4m3fn::from_bits()); + } +}; + +} // namespace std + +C10_CLANG_DIAGNOSTIC_POP() diff --git a/phivenv/Lib/site-packages/torch/include/c10/util/Float8_e4m3fn.h b/phivenv/Lib/site-packages/torch/include/c10/util/Float8_e4m3fn.h new file mode 100644 index 0000000000000000000000000000000000000000..f563dd5914ea409abbf7c1e9275dc2559b658741 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/util/Float8_e4m3fn.h @@ -0,0 +1,240 @@ +#pragma once + +/// Defines the Float8_e4m3fn type (8-bit floating-point) including conversions +/// to standard C types and basic arithmetic operations. Note that arithmetic +/// operations are implemented by converting to floating point and +/// performing the operation in float32. +/// Binary configuration: +/// s eeee mmm +/// 1 sign bit +/// 4 exponent bits +/// 3 mantissa bits +/// bias = 7 +/// +/// Implementation based on the paper https://arxiv.org/pdf/2209.05433.pdf +/// and inspired by Half implementation from pytorch/c10/util/Half.h + +#include +#include + +#if defined(__cplusplus) +#include +#include +#elif !defined(__OPENCL_VERSION__) +#include +#include +#endif + +#ifdef _MSC_VER +#include +#endif + +#include +#include + +namespace c10 { + +namespace detail { + +/* + * Convert a 8-bit floating-point number in fp8 E4M3FN format, in bit + * representation, to a 32-bit floating-point number in IEEE single-precision + * format, in bit representation. + * + * @note The implementation doesn't use any floating-point operations. + */ +inline C10_HOST_DEVICE float fp8e4m3fn_to_fp32_value(uint8_t input) { + /* + * Extend the fp8 E4M3FN number to 32 bits and shift to the + * upper part of the 32-bit word: + * +---+----+---+-----------------------------+ + * | S |EEEE|MMM|0000 0000 0000 0000 0000 0000| + * +---+----+---+-----------------------------+ + * Bits 31 27-30 24-26 0-23 + * + * S - sign bit, E - bits of the biased exponent, M - bits of the mantissa, 0 + * - zero bits. + */ + const uint32_t w = (uint32_t)input << 24; + /* + * Extract the sign of the input number into the high bit of the 32-bit word: + * + * +---+----------------------------------+ + * | S |0000000 00000000 00000000 00000000| + * +---+----------------------------------+ + * Bits 31 0-31 + */ + const uint32_t sign = w & UINT32_C(0x80000000); + /* + * Extract mantissa and biased exponent of the input number into the bits 0-30 + * of the 32-bit word: + * + * +---+----+---+-----------------------------+ + * | S |EEEE|MMM|0000 0000 0000 0000 0000 0000| + * +---+----+---+-----------------------------+ + * Bits 31 27-30 24-26 0-23 + */ + const uint32_t nonsign = w & UINT32_C(0x7FFFFFFF); + /* + * Renorm shift is the number of bits to shift mantissa left to make the + * half-precision number normalized. If the initial number is normalized, some + * of its high 5 bits (sign == 0 and 4-bit exponent) equals one. In this case + * renorm_shift == 0. If the number is denormalize, renorm_shift > 0. Note + * that if we shift denormalized nonsign by renorm_shift, the unit bit of + * mantissa will shift into exponent, turning the biased exponent into 1, and + * making mantissa normalized (i.e. without leading 1). + */ +#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__) + uint32_t renorm_shift = __clz(nonsign); +#elif defined(__SYCL_DEVICE_ONLY__) + // Note: zero is not a supported input into `__builtin_clz` + uint32_t renorm_shift = + nonsign != 0 ? __builtin_clz(nonsign) : sizeof(uint32_t) * CHAR_BIT; +#elif defined(_MSC_VER) && !defined(__clang__) + unsigned long nonsign_bsr; + _BitScanReverse(&nonsign_bsr, (unsigned long)nonsign); + uint32_t renorm_shift = (uint32_t)nonsign_bsr ^ 31; +#else + // Note: zero is not a supported input into `__builtin_clz` + uint32_t renorm_shift = + nonsign != 0 ? __builtin_clz(nonsign) : sizeof(uint32_t) * CHAR_BIT; +#endif + renorm_shift = renorm_shift > 4 ? renorm_shift - 4 : 0; + /* + * Iff fp8e4m3fn number has all exponent and mantissa bits set to 1, + * the addition overflows it into bit 31, and the subsequent shift turns the + * high 9 bits into 1. Thus inf_nan_mask == 0x7F800000 if the fp8e4m3fn number + * is Nan, 0x00000000 otherwise + */ + const int32_t inf_nan_mask = + ((int32_t)(nonsign + 0x01000000) >> 8) & INT32_C(0x7F800000); + /* + * Iff nonsign is 0, it overflows into 0xFFFFFFFF, turning bit 31 + * into 1. Otherwise, bit 31 remains 0. The signed shift right by 31 + * broadcasts bit 31 into all bits of the zero_mask. Thus zero_mask == + * 0xFFFFFFFF if the half-precision number was zero (+0.0h or -0.0h) + * 0x00000000 otherwise + */ + const int32_t zero_mask = (int32_t)(nonsign - 1) >> 31; + /* + * 1. Shift nonsign left by renorm_shift to normalize it (if the input + * was denormal) + * 2. Shift nonsign right by 4 so the exponent (4 bits originally) + * becomes an 8-bit field and 3-bit mantissa shifts into the 3 high + * bits of the 23-bit mantissa of IEEE single-precision number. + * 3. Add 0x78 to the exponent (starting at bit 23) to compensate the + * different in exponent bias (0x7F for single-precision number less 0x07 + * for fp8e4m3fn number). + * 4. Subtract renorm_shift from the exponent (starting at bit 23) to + * account for renormalization. As renorm_shift is less than 0x78, this + * can be combined with step 3. + * 5. Binary OR with inf_nan_mask to turn the exponent into 0xFF if the + * input was NaN or infinity. + * 6. Binary ANDNOT with zero_mask to turn the mantissa and exponent + * into zero if the input was zero. + * 7. Combine with the sign of the input number. + */ + uint32_t result = sign | + ((((nonsign << renorm_shift >> 4) + ((0x78 - renorm_shift) << 23)) | + inf_nan_mask) & + ~zero_mask); + return fp32_from_bits(result); +} + +/* + * Convert a 32-bit floating-point number in IEEE single-precision format to a + * 8-bit floating-point number in fp8 E4M3FN format, in bit representation. + */ +inline C10_HOST_DEVICE uint8_t fp8e4m3fn_from_fp32_value(float f) { + /* + * Binary representation of 480.0f, which is the first value + * not representable in fp8e4m3fn range: + * 0 1111 111 - fp8e4m3fn + * 0 10000111 11100000000000000000000 - fp32 + */ + constexpr uint32_t fp8_max = UINT32_C(1087) << 20; + + /* + * A mask for converting fp32 numbers lower than fp8e4m3fn normal range + * into denorm representation + * magic number: ((127 - 7) + (23 - 3) + 1) + */ + constexpr uint32_t denorm_mask = UINT32_C(141) << 23; + + uint32_t f_bits = fp32_to_bits(f); + + uint8_t result = 0u; + + /* + * Extract the sign of the input number into the high bit of the 32-bit word: + * + * +---+----------------------------------+ + * | S |0000000 00000000 00000000 00000000| + * +---+----------------------------------+ + * Bits 31 0-31 + */ + const uint32_t sign = f_bits & UINT32_C(0x80000000); + + /* + * Set sign bit to 0 + */ + f_bits ^= sign; + + if (f_bits >= fp8_max) { + // NaN - all exponent and mantissa bits set to 1 + result = 0x7f; + } else { + if (f_bits < (UINT32_C(121) << 23)) { + // Input number is smaller than 2^(-6), which is the smallest + // fp8e4m3fn normal number + f_bits = + fp32_to_bits(fp32_from_bits(f_bits) + fp32_from_bits(denorm_mask)); + result = static_cast(f_bits - denorm_mask); + } else { + // resulting mantissa is odd + uint8_t mant_odd = (f_bits >> 20) & 1; + + // update exponent, rounding bias part 1 + f_bits += ((uint32_t)(7 - 127) << 23) + 0x7FFFF; + + // rounding bias part 2 + f_bits += mant_odd; + + // take the bits! + result = static_cast(f_bits >> 20); + } + } + + result |= static_cast(sign >> 24); + return result; +} + +} // namespace detail + +struct alignas(1) Float8_e4m3fn { + uint8_t x; + + struct from_bits_t {}; + C10_HOST_DEVICE static constexpr from_bits_t from_bits() { + return from_bits_t(); + } + + Float8_e4m3fn() = default; + + constexpr C10_HOST_DEVICE Float8_e4m3fn(uint8_t bits, from_bits_t) + : x(bits) {} + inline C10_HOST_DEVICE Float8_e4m3fn(float value); + inline C10_HOST_DEVICE operator float() const; + inline C10_HOST_DEVICE bool isnan() const; +}; + +C10_API inline std::ostream& operator<<( + std::ostream& out, + const Float8_e4m3fn& value) { + out << (float)value; + return out; +} + +} // namespace c10 + +#include // IWYU pragma: keep diff --git a/phivenv/Lib/site-packages/torch/include/c10/util/Float8_e4m3fnuz-inl.h b/phivenv/Lib/site-packages/torch/include/c10/util/Float8_e4m3fnuz-inl.h new file mode 100644 index 0000000000000000000000000000000000000000..4c54c3b7d7d04b222a6498f15cb97d076d1bf890 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/util/Float8_e4m3fnuz-inl.h @@ -0,0 +1,279 @@ +#pragma once + +#include +#include +#include +#include + +C10_CLANG_DIAGNOSTIC_PUSH() +#if C10_CLANG_HAS_WARNING("-Wimplicit-int-float-conversion") +C10_CLANG_DIAGNOSTIC_IGNORE("-Wimplicit-int-float-conversion") +#endif + +namespace c10 { + +/// Constructors + +inline C10_HOST_DEVICE Float8_e4m3fnuz::Float8_e4m3fnuz(float value) + : x(detail::fp8e4m3fnuz_from_fp32_value(value)) {} + +/// Implicit conversions + +inline C10_HOST_DEVICE Float8_e4m3fnuz::operator float() const { + return detail::fp8_fnuz_to_fp32_value<4, 3>(x); +} + +/// Special values helper + +inline C10_HOST_DEVICE bool Float8_e4m3fnuz::isnan() const { + return x == 0b10000000; +} + +/// Arithmetic + +inline C10_HOST_DEVICE Float8_e4m3fnuz +operator+(const Float8_e4m3fnuz& a, const Float8_e4m3fnuz& b) { + return static_cast(a) + static_cast(b); +} + +inline C10_HOST_DEVICE Float8_e4m3fnuz +operator-(const Float8_e4m3fnuz& a, const Float8_e4m3fnuz& b) { + return static_cast(a) - static_cast(b); +} + +inline C10_HOST_DEVICE Float8_e4m3fnuz +operator*(const Float8_e4m3fnuz& a, const Float8_e4m3fnuz& b) { + return static_cast(a) * static_cast(b); +} + +inline C10_HOST_DEVICE Float8_e4m3fnuz operator/( + const Float8_e4m3fnuz& a, + const Float8_e4m3fnuz& b) __ubsan_ignore_float_divide_by_zero__ { + return static_cast(a) / static_cast(b); +} + +inline C10_HOST_DEVICE Float8_e4m3fnuz operator-(const Float8_e4m3fnuz& a) { + return -static_cast(a); +} + +inline C10_HOST_DEVICE Float8_e4m3fnuz& operator+=( + Float8_e4m3fnuz& a, + const Float8_e4m3fnuz& b) { + a = a + b; + return a; +} + +inline C10_HOST_DEVICE Float8_e4m3fnuz& operator-=( + Float8_e4m3fnuz& a, + const Float8_e4m3fnuz& b) { + a = a - b; + return a; +} + +inline C10_HOST_DEVICE Float8_e4m3fnuz& operator*=( + Float8_e4m3fnuz& a, + const Float8_e4m3fnuz& b) { + a = a * b; + return a; +} + +inline C10_HOST_DEVICE Float8_e4m3fnuz& operator/=( + Float8_e4m3fnuz& a, + const Float8_e4m3fnuz& b) { + a = a / b; + return a; +} + +/// Arithmetic with floats + +inline C10_HOST_DEVICE float operator+(Float8_e4m3fnuz a, float b) { + return static_cast(a) + b; +} +inline C10_HOST_DEVICE float operator-(Float8_e4m3fnuz a, float b) { + return static_cast(a) - b; +} +inline C10_HOST_DEVICE float operator*(Float8_e4m3fnuz a, float b) { + return static_cast(a) * b; +} +inline C10_HOST_DEVICE float operator/(Float8_e4m3fnuz a, float b) + __ubsan_ignore_float_divide_by_zero__ { + return static_cast(a) / b; +} + +inline C10_HOST_DEVICE float operator+(float a, Float8_e4m3fnuz b) { + return a + static_cast(b); +} +inline C10_HOST_DEVICE float operator-(float a, Float8_e4m3fnuz b) { + return a - static_cast(b); +} +inline C10_HOST_DEVICE float operator*(float a, Float8_e4m3fnuz b) { + return a * static_cast(b); +} +inline C10_HOST_DEVICE float operator/(float a, Float8_e4m3fnuz b) + __ubsan_ignore_float_divide_by_zero__ { + return a / static_cast(b); +} + +inline C10_HOST_DEVICE float& operator+=(float& a, const Float8_e4m3fnuz& b) { + return a += static_cast(b); +} +inline C10_HOST_DEVICE float& operator-=(float& a, const Float8_e4m3fnuz& b) { + return a -= static_cast(b); +} +inline C10_HOST_DEVICE float& operator*=(float& a, const Float8_e4m3fnuz& b) { + return a *= static_cast(b); +} +inline C10_HOST_DEVICE float& operator/=(float& a, const Float8_e4m3fnuz& b) { + return a /= static_cast(b); +} + +/// Arithmetic with doubles + +inline C10_HOST_DEVICE double operator+(Float8_e4m3fnuz a, double b) { + return static_cast(a) + b; +} +inline C10_HOST_DEVICE double operator-(Float8_e4m3fnuz a, double b) { + return static_cast(a) - b; +} +inline C10_HOST_DEVICE double operator*(Float8_e4m3fnuz a, double b) { + return static_cast(a) * b; +} +inline C10_HOST_DEVICE double operator/(Float8_e4m3fnuz a, double b) + __ubsan_ignore_float_divide_by_zero__ { + return static_cast(a) / b; +} + +inline C10_HOST_DEVICE double operator+(double a, Float8_e4m3fnuz b) { + return a + static_cast(b); +} +inline C10_HOST_DEVICE double operator-(double a, Float8_e4m3fnuz b) { + return a - static_cast(b); +} +inline C10_HOST_DEVICE double operator*(double a, Float8_e4m3fnuz b) { + return a * static_cast(b); +} +inline C10_HOST_DEVICE double operator/(double a, Float8_e4m3fnuz b) + __ubsan_ignore_float_divide_by_zero__ { + return a / static_cast(b); +} + +/// Arithmetic with ints + +inline C10_HOST_DEVICE Float8_e4m3fnuz operator+(Float8_e4m3fnuz a, int b) { + return a + static_cast(b); +} +inline C10_HOST_DEVICE Float8_e4m3fnuz operator-(Float8_e4m3fnuz a, int b) { + return a - static_cast(b); +} +inline C10_HOST_DEVICE Float8_e4m3fnuz operator*(Float8_e4m3fnuz a, int b) { + return a * static_cast(b); +} +inline C10_HOST_DEVICE Float8_e4m3fnuz operator/(Float8_e4m3fnuz a, int b) { + return a / static_cast(b); +} + +inline C10_HOST_DEVICE Float8_e4m3fnuz operator+(int a, Float8_e4m3fnuz b) { + return static_cast(a) + b; +} +inline C10_HOST_DEVICE Float8_e4m3fnuz operator-(int a, Float8_e4m3fnuz b) { + return static_cast(a) - b; +} +inline C10_HOST_DEVICE Float8_e4m3fnuz operator*(int a, Float8_e4m3fnuz b) { + return static_cast(a) * b; +} +inline C10_HOST_DEVICE Float8_e4m3fnuz operator/(int a, Float8_e4m3fnuz b) { + return static_cast(a) / b; +} + +//// Arithmetic with int64_t + +inline C10_HOST_DEVICE Float8_e4m3fnuz operator+(Float8_e4m3fnuz a, int64_t b) { + return a + static_cast(b); +} +inline C10_HOST_DEVICE Float8_e4m3fnuz operator-(Float8_e4m3fnuz a, int64_t b) { + return a - static_cast(b); +} +inline C10_HOST_DEVICE Float8_e4m3fnuz operator*(Float8_e4m3fnuz a, int64_t b) { + return a * static_cast(b); +} +inline C10_HOST_DEVICE Float8_e4m3fnuz operator/(Float8_e4m3fnuz a, int64_t b) { + return a / static_cast(b); +} + +inline C10_HOST_DEVICE Float8_e4m3fnuz operator+(int64_t a, Float8_e4m3fnuz b) { + return static_cast(a) + b; +} +inline C10_HOST_DEVICE Float8_e4m3fnuz operator-(int64_t a, Float8_e4m3fnuz b) { + return static_cast(a) - b; +} +inline C10_HOST_DEVICE Float8_e4m3fnuz operator*(int64_t a, Float8_e4m3fnuz b) { + return static_cast(a) * b; +} +inline C10_HOST_DEVICE Float8_e4m3fnuz operator/(int64_t a, Float8_e4m3fnuz b) { + return static_cast(a) / b; +} + +/// NOTE: we do not define comparisons directly and instead rely on the implicit +/// conversion from c10::Float8_e4m3fnuz to float. + +} // namespace c10 + +namespace std { + +template <> +class numeric_limits { + public: + static constexpr bool is_specialized = true; + static constexpr bool is_signed = true; + static constexpr bool is_integer = false; + static constexpr bool is_exact = false; + static constexpr bool has_infinity = false; + static constexpr bool has_quiet_NaN = true; + static constexpr bool has_signaling_NaN = false; + static constexpr auto has_denorm = true; + static constexpr auto has_denorm_loss = true; + static constexpr auto round_style = numeric_limits::round_style; + static constexpr bool is_iec559 = false; + static constexpr bool is_bounded = true; + static constexpr bool is_modulo = false; + static constexpr int digits = 4; + static constexpr int digits10 = 0; + static constexpr int max_digits10 = 3; + static constexpr int radix = 2; + static constexpr int min_exponent = -6; + static constexpr int min_exponent10 = -1; + static constexpr int max_exponent = 8; + static constexpr int max_exponent10 = 2; + static constexpr auto traps = numeric_limits::traps; + static constexpr auto tinyness_before = false; + + static constexpr c10::Float8_e4m3fnuz min() { + return c10::Float8_e4m3fnuz(0x08, c10::Float8_e4m3fnuz::from_bits()); + } + static constexpr c10::Float8_e4m3fnuz lowest() { + return c10::Float8_e4m3fnuz(0xFF, c10::Float8_e4m3fnuz::from_bits()); + } + static constexpr c10::Float8_e4m3fnuz max() { + return c10::Float8_e4m3fnuz(0x7F, c10::Float8_e4m3fnuz::from_bits()); + } + static constexpr c10::Float8_e4m3fnuz epsilon() { + return c10::Float8_e4m3fnuz(0x28, c10::Float8_e4m3fnuz::from_bits()); + } + static constexpr c10::Float8_e4m3fnuz round_error() { + return c10::Float8_e4m3fnuz(0x38, c10::Float8_e4m3fnuz::from_bits()); + } + static constexpr c10::Float8_e4m3fnuz infinity() { + // NaN (no infinities) + return c10::Float8_e4m3fnuz(0x80, c10::Float8_e4m3fnuz::from_bits()); + } + static constexpr c10::Float8_e4m3fnuz quiet_NaN() { + return c10::Float8_e4m3fnuz(0x80, c10::Float8_e4m3fnuz::from_bits()); + } + static constexpr c10::Float8_e4m3fnuz denorm_min() { + return c10::Float8_e4m3fnuz(0x01, c10::Float8_e4m3fnuz::from_bits()); + } +}; + +} // namespace std + +C10_CLANG_DIAGNOSTIC_POP() diff --git a/phivenv/Lib/site-packages/torch/include/c10/util/Float8_e4m3fnuz.h b/phivenv/Lib/site-packages/torch/include/c10/util/Float8_e4m3fnuz.h new file mode 100644 index 0000000000000000000000000000000000000000..2ee954b80b58ec0179dd69fafa07966e028e56a0 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/util/Float8_e4m3fnuz.h @@ -0,0 +1,139 @@ +#pragma once + +/// Defines the Float8_e4m3fnuz type (8-bit floating-point) including +/// conversions to standard C types and basic arithmetic operations. Note that +/// arithmetic operations are implemented by converting to floating point and +/// performing the operation in float32. +/// Binary configuration remains the same as Float8_e4m3fn: +/// s eeee mmm +/// 1 sign bit +/// 4 exponent bits +/// 3 mantissa bits +/// The key differences versus Float8_e4m3fn are: +/// bias = 8 +/// no infinities or negative zero +/// NaN only when sign bit is 1, rest all 0s +/// +/// Implementation based on the paper https://arxiv.org/pdf/2206.02915.pdf and +/// the existing Float8_e4m3fn implementation. + +#include +#include +#include +#include + +#if defined(__cplusplus) +#include +#elif !defined(__OPENCL_VERSION__) +#include +#include +#endif + +#include +#include + +namespace c10 { + +namespace detail { + +/* + * Convert a 32-bit floating-point number in IEEE single-precision format to a + * 8-bit floating-point number in fp8 E4M3FNUZ format, in bit representation. + */ +inline C10_HOST_DEVICE uint8_t fp8e4m3fnuz_from_fp32_value(float f) { + /* + * Binary representation of 256.0f, which is the first value not representable + * (i.e. the first value which would overflow in to the sign bit, resulting in + * a NaN) in fp8e4m3fnuz range: + * 1 0000 000 - fp8e4m3fnuz + * 0 10000111 00000000000000000000000 - fp32 + */ + constexpr uint32_t fnuz_max = UINT32_C(0x87) << 23; + + /* + * A mask for converting fp32 numbers lower than fp8e4m3fnuz normal range + * into denorm representation + * magic number: ((127 - 8) + (23 - 3) + 1) + */ + constexpr uint32_t denorm_mask = UINT32_C(0x8C) << 23; + + uint32_t f_bits = fp32_to_bits(f); + + uint32_t result = 0u; + + /* + * Extract the sign of the input number into the high bit of the 32-bit word: + * + * +---+----------------------------------+ + * | S |0000000 00000000 00000000 00000000| + * +---+----------------------------------+ + * Bits 31 0-31 + */ + const uint32_t sign = f_bits & UINT32_C(0x80000000); + + /* + * Set sign bit to 0 + */ + f_bits ^= sign; + + if (f_bits >= fnuz_max) { + // NaN -- sign bit set to 1, rest 0s. + return 0x80; + } + + if (f_bits < (UINT32_C(0x78) << 23) /* 2^-7 in float32 */) { + // Input exponent is less than -7, the smallest e4m3fnuz exponent, so the + // number will become subnormal. + f_bits = fp32_to_bits(fp32_from_bits(f_bits) + fp32_from_bits(denorm_mask)); + result = static_cast(f_bits - denorm_mask); + if (result == 0) { + // fnuz types don't have negative zero. + return 0; + } + } else { + // resulting mantissa is odd + uint8_t mant_odd = (f_bits >> 20) & 1; + + // update exponent, rounding bias part 1 + f_bits += ((uint32_t)(8 - 127) << 23) + 0x7FFFF; + + // rounding bias part 2 + f_bits += mant_odd; + + // take the bits! + result = static_cast(f_bits >> 20); + } + + result |= sign >> 24; + return result; +} + +} // namespace detail + +struct alignas(1) Float8_e4m3fnuz { + uint8_t x; + + struct from_bits_t {}; + C10_HOST_DEVICE static constexpr from_bits_t from_bits() { + return from_bits_t(); + } + + Float8_e4m3fnuz() = default; + + constexpr C10_HOST_DEVICE Float8_e4m3fnuz(uint8_t bits, from_bits_t) + : x(bits) {} + inline C10_HOST_DEVICE Float8_e4m3fnuz(float value); + inline C10_HOST_DEVICE operator float() const; + inline C10_HOST_DEVICE bool isnan() const; +}; + +C10_API inline std::ostream& operator<<( + std::ostream& out, + const Float8_e4m3fnuz& value) { + out << (float)value; + return out; +} + +} // namespace c10 + +#include // IWYU pragma: keep diff --git a/phivenv/Lib/site-packages/torch/include/c10/util/Float8_e5m2-inl.h b/phivenv/Lib/site-packages/torch/include/c10/util/Float8_e5m2-inl.h new file mode 100644 index 0000000000000000000000000000000000000000..925f268a582ab9c341968be6667bebed68e3388d --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/util/Float8_e5m2-inl.h @@ -0,0 +1,286 @@ +#pragma once + +#include +#include +#include + +C10_CLANG_DIAGNOSTIC_PUSH() +#if C10_CLANG_HAS_WARNING("-Wimplicit-int-float-conversion") +C10_CLANG_DIAGNOSTIC_IGNORE("-Wimplicit-int-float-conversion") +#endif + +#define EXP_WIDTH_FP8 5 +#define MAN_WIDTH_FP8 2 +#define EXP_BIAS_FP8 15 + +namespace c10 { + +/// Constructors + +inline C10_HOST_DEVICE Float8_e5m2::Float8_e5m2(float value) + : x(detail::fp8e5m2_from_fp32_value(value)) {} + +/// Implicit conversions + +inline C10_HOST_DEVICE Float8_e5m2::operator float() const { + return detail::fp8e5m2_to_fp32_value(x); +} + +/// Special values helpers + +inline C10_HOST_DEVICE bool Float8_e5m2::isnan() const { + return (x & 0b01111111) > 0b01111100; +} + +inline C10_HOST_DEVICE bool Float8_e5m2::isinf() const { + return (x & 0b01111111) == 0b01111100; +} + +/// Arithmetic + +inline C10_HOST_DEVICE Float8_e5m2 +operator+(const Float8_e5m2& a, const Float8_e5m2& b) { + return static_cast(a) + static_cast(b); +} + +inline C10_HOST_DEVICE Float8_e5m2 +operator-(const Float8_e5m2& a, const Float8_e5m2& b) { + return static_cast(a) - static_cast(b); +} + +inline C10_HOST_DEVICE Float8_e5m2 +operator*(const Float8_e5m2& a, const Float8_e5m2& b) { + return static_cast(a) * static_cast(b); +} + +inline C10_HOST_DEVICE Float8_e5m2 operator/( + const Float8_e5m2& a, + const Float8_e5m2& b) __ubsan_ignore_float_divide_by_zero__ { + return static_cast(a) / static_cast(b); +} + +inline C10_HOST_DEVICE Float8_e5m2 operator-(const Float8_e5m2& a) { + return -static_cast(a); +} + +inline C10_HOST_DEVICE Float8_e5m2& operator+=( + Float8_e5m2& a, + const Float8_e5m2& b) { + a = a + b; + return a; +} + +inline C10_HOST_DEVICE Float8_e5m2& operator-=( + Float8_e5m2& a, + const Float8_e5m2& b) { + a = a - b; + return a; +} + +inline C10_HOST_DEVICE Float8_e5m2& operator*=( + Float8_e5m2& a, + const Float8_e5m2& b) { + a = a * b; + return a; +} + +inline C10_HOST_DEVICE Float8_e5m2& operator/=( + Float8_e5m2& a, + const Float8_e5m2& b) { + a = a / b; + return a; +} + +/// Arithmetic with floats + +inline C10_HOST_DEVICE float operator+(Float8_e5m2 a, float b) { + return static_cast(a) + b; +} +inline C10_HOST_DEVICE float operator-(Float8_e5m2 a, float b) { + return static_cast(a) - b; +} +inline C10_HOST_DEVICE float operator*(Float8_e5m2 a, float b) { + return static_cast(a) * b; +} +inline C10_HOST_DEVICE float operator/(Float8_e5m2 a, float b) + __ubsan_ignore_float_divide_by_zero__ { + return static_cast(a) / b; +} + +inline C10_HOST_DEVICE float operator+(float a, Float8_e5m2 b) { + return a + static_cast(b); +} +inline C10_HOST_DEVICE float operator-(float a, Float8_e5m2 b) { + return a - static_cast(b); +} +inline C10_HOST_DEVICE float operator*(float a, Float8_e5m2 b) { + return a * static_cast(b); +} +inline C10_HOST_DEVICE float operator/(float a, Float8_e5m2 b) + __ubsan_ignore_float_divide_by_zero__ { + return a / static_cast(b); +} + +inline C10_HOST_DEVICE float& operator+=(float& a, const Float8_e5m2& b) { + return a += static_cast(b); +} +inline C10_HOST_DEVICE float& operator-=(float& a, const Float8_e5m2& b) { + return a -= static_cast(b); +} +inline C10_HOST_DEVICE float& operator*=(float& a, const Float8_e5m2& b) { + return a *= static_cast(b); +} +inline C10_HOST_DEVICE float& operator/=(float& a, const Float8_e5m2& b) { + return a /= static_cast(b); +} + +/// Arithmetic with doubles + +inline C10_HOST_DEVICE double operator+(Float8_e5m2 a, double b) { + return static_cast(a) + b; +} +inline C10_HOST_DEVICE double operator-(Float8_e5m2 a, double b) { + return static_cast(a) - b; +} +inline C10_HOST_DEVICE double operator*(Float8_e5m2 a, double b) { + return static_cast(a) * b; +} +inline C10_HOST_DEVICE double operator/(Float8_e5m2 a, double b) + __ubsan_ignore_float_divide_by_zero__ { + return static_cast(a) / b; +} + +inline C10_HOST_DEVICE double operator+(double a, Float8_e5m2 b) { + return a + static_cast(b); +} +inline C10_HOST_DEVICE double operator-(double a, Float8_e5m2 b) { + return a - static_cast(b); +} +inline C10_HOST_DEVICE double operator*(double a, Float8_e5m2 b) { + return a * static_cast(b); +} +inline C10_HOST_DEVICE double operator/(double a, Float8_e5m2 b) + __ubsan_ignore_float_divide_by_zero__ { + return a / static_cast(b); +} + +/// Arithmetic with ints + +inline C10_HOST_DEVICE Float8_e5m2 operator+(Float8_e5m2 a, int b) { + return a + static_cast(b); +} +inline C10_HOST_DEVICE Float8_e5m2 operator-(Float8_e5m2 a, int b) { + return a - static_cast(b); +} +inline C10_HOST_DEVICE Float8_e5m2 operator*(Float8_e5m2 a, int b) { + return a * static_cast(b); +} +inline C10_HOST_DEVICE Float8_e5m2 operator/(Float8_e5m2 a, int b) { + return a / static_cast(b); +} + +inline C10_HOST_DEVICE Float8_e5m2 operator+(int a, Float8_e5m2 b) { + return static_cast(a) + b; +} +inline C10_HOST_DEVICE Float8_e5m2 operator-(int a, Float8_e5m2 b) { + return static_cast(a) - b; +} +inline C10_HOST_DEVICE Float8_e5m2 operator*(int a, Float8_e5m2 b) { + return static_cast(a) * b; +} +inline C10_HOST_DEVICE Float8_e5m2 operator/(int a, Float8_e5m2 b) { + return static_cast(a) / b; +} + +//// Arithmetic with int64_t + +inline C10_HOST_DEVICE Float8_e5m2 operator+(Float8_e5m2 a, int64_t b) { + return a + static_cast(b); +} +inline C10_HOST_DEVICE Float8_e5m2 operator-(Float8_e5m2 a, int64_t b) { + return a - static_cast(b); +} +inline C10_HOST_DEVICE Float8_e5m2 operator*(Float8_e5m2 a, int64_t b) { + return a * static_cast(b); +} +inline C10_HOST_DEVICE Float8_e5m2 operator/(Float8_e5m2 a, int64_t b) { + return a / static_cast(b); +} + +inline C10_HOST_DEVICE Float8_e5m2 operator+(int64_t a, Float8_e5m2 b) { + return static_cast(a) + b; +} +inline C10_HOST_DEVICE Float8_e5m2 operator-(int64_t a, Float8_e5m2 b) { + return static_cast(a) - b; +} +inline C10_HOST_DEVICE Float8_e5m2 operator*(int64_t a, Float8_e5m2 b) { + return static_cast(a) * b; +} +inline C10_HOST_DEVICE Float8_e5m2 operator/(int64_t a, Float8_e5m2 b) { + return static_cast(a) / b; +} + +/// NOTE: we do not define comparisons directly and instead rely on the implicit +/// conversion from c10::Float8_e5m2 to float. + +} // namespace c10 + +namespace std { + +template <> +class numeric_limits { + public: + static constexpr bool is_signed = true; + static constexpr bool is_integer = false; + static constexpr bool is_specialized = true; + static constexpr bool is_exact = false; + static constexpr bool has_infinity = true; + static constexpr bool has_quiet_NaN = true; + static constexpr bool has_signaling_NaN = false; + static constexpr auto has_denorm = true; + static constexpr auto has_denorm_loss = true; + static constexpr auto round_style = numeric_limits::round_style; + static constexpr bool is_iec559 = false; + static constexpr bool is_bounded = true; + static constexpr bool is_modulo = false; + static constexpr int digits = 3; + static constexpr int digits10 = 0; + static constexpr int max_digits10 = 2; + static constexpr int radix = 2; + static constexpr int min_exponent = -13; + static constexpr int min_exponent10 = -4; + static constexpr int max_exponent = 16; + static constexpr int max_exponent10 = 4; + static constexpr auto traps = numeric_limits::traps; + static constexpr auto tinyness_before = + numeric_limits::tinyness_before; + + static constexpr c10::Float8_e5m2 min() { + return c10::Float8_e5m2(0x4, c10::Float8_e5m2::from_bits()); + } + static constexpr c10::Float8_e5m2 max() { + return c10::Float8_e5m2(0x7B, c10::Float8_e5m2::from_bits()); + } + static constexpr c10::Float8_e5m2 lowest() { + return c10::Float8_e5m2(0xFB, c10::Float8_e5m2::from_bits()); + } + static constexpr c10::Float8_e5m2 epsilon() { + return c10::Float8_e5m2(0x34, c10::Float8_e5m2::from_bits()); + } + static constexpr c10::Float8_e5m2 round_error() { + return c10::Float8_e5m2(0x38, c10::Float8_e5m2::from_bits()); + } + static constexpr c10::Float8_e5m2 infinity() { + return c10::Float8_e5m2(0x7C, c10::Float8_e5m2::from_bits()); + } + static constexpr c10::Float8_e5m2 quiet_NaN() { + return c10::Float8_e5m2(0x7F, c10::Float8_e5m2::from_bits()); + } + static constexpr c10::Float8_e5m2 denorm_min() { + return c10::Float8_e5m2(0x01, c10::Float8_e5m2::from_bits()); + } +}; + +} // namespace std + +C10_CLANG_DIAGNOSTIC_POP() diff --git a/phivenv/Lib/site-packages/torch/include/c10/util/Float8_e5m2.h b/phivenv/Lib/site-packages/torch/include/c10/util/Float8_e5m2.h new file mode 100644 index 0000000000000000000000000000000000000000..64e6d8fb8dbb3217ab138e016000be1376c23dc1 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/util/Float8_e5m2.h @@ -0,0 +1,148 @@ +#pragma once + +/// Defines the Float8_e5m2 type (8-bit floating-point) including conversions +/// to standard C types and basic arithmetic operations. Note that arithmetic +/// operations are implemented by converting to floating point and +/// performing the operation in float32. +/// Binary configuration: +/// s eeeee mm +/// 1 sign bit +/// 5 exponent bits +/// 2 mantissa bits +/// bias = 15 +/// +/// Implementation based on the paper https://arxiv.org/pdf/2209.05433.pdf +/// and inspired by Half implementation from pytorch/c10/util/Half.h + +#include + +namespace c10 { + +namespace detail { + +/* + * Convert a 8-bit floating-point number in fp8 E5M2 format, in bit + * representation, to a 32-bit floating-point number in IEEE single-precision + * format, in bit representation. + * + * @note The implementation doesn't use any floating-point operations. + */ +inline C10_HOST_DEVICE float fp8e5m2_to_fp32_value(uint8_t input) { + /* + * Extend the fp8 E5M2 number to 32 bits and shift to the + * upper part of the 32-bit word: + * +---+----+---+-----------------------------+ + * | S |EEEEE|MM|0000 0000 0000 0000 0000 0000| + * +---+----+---+-----------------------------+ + * Bits 31 26-30 24-25 0-23 + * + * S - sign bit, E - bits of the biased exponent, M - bits of the mantissa, 0 + * - zero bits. + */ + uint16_t half_representation = input; + half_representation <<= 8; + return fp16_ieee_to_fp32_value(half_representation); +} + +/* + * Convert a 32-bit floating-point number in IEEE single-precision format to a + * 8-bit floating-point number in fp8 E5M2 format, in bit representation. + */ +inline C10_HOST_DEVICE uint8_t fp8e5m2_from_fp32_value(float f) { + /* + * Binary representation of fp32 infinity + * 0 11111111 00000000000000000000000 + */ + constexpr uint32_t fp32_inf = UINT32_C(255) << 23; + + /* + * Binary representation of 65536.0f, which is the first value + * not representable in fp8e5m2 range: + * 0 11111 00 - fp8e5m2 + * 0 10001111 00000000000000000000000 - fp32 + */ + constexpr uint32_t fp8_max = UINT32_C(143) << 23; + + /* + * A mask for converting fp32 numbers lower than fp8e5m2 normal range + * into denorm representation + * magic number: ((127 - 15) + (23 - 2) + 1) + */ + constexpr uint32_t denorm_mask = UINT32_C(134) << 23; + + uint32_t f_bits = fp32_to_bits(f); + uint8_t result = 0u; + + /* + * Extract the sign of the input number into the high bit of the 32-bit word: + * + * +---+----------------------------------+ + * | S |0000000 00000000 00000000 00000000| + * +---+----------------------------------+ + * Bits 31 0-31 + */ + const uint32_t sign = f_bits & UINT32_C(0x80000000); + + /* + * Set sign bit to 0 + */ + f_bits ^= sign; + + if (f_bits >= fp8_max) { + // NaN - all exponent and mantissa bits set to 1 + result = f_bits > fp32_inf ? UINT8_C(0x7F) : UINT8_C(0x7C); + } else { + if (f_bits < (UINT32_C(113) << 23)) { + // Input number is smaller than 2^(-14), which is the smallest + // fp8e5m2 normal number + f_bits = + fp32_to_bits(fp32_from_bits(f_bits) + fp32_from_bits(denorm_mask)); + result = static_cast(f_bits - denorm_mask); + } else { + // resulting mantissa is odd + uint32_t mant_odd = (f_bits >> 21) & 1; + + // update exponent, rounding bias part 1 + f_bits += ((uint32_t)(15 - 127) << 23) + 0xFFFFF; + + // rounding bias part 2 + f_bits += mant_odd; + + // take the bits! + result = static_cast(f_bits >> 21); + } + } + + result |= static_cast(sign >> 24); + return result; +} + +} // namespace detail + +struct alignas(1) Float8_e5m2 { + uint8_t x; + + struct from_bits_t {}; + C10_HOST_DEVICE static constexpr from_bits_t from_bits() { + return from_bits_t(); + } + + Float8_e5m2() = default; + + constexpr C10_HOST_DEVICE Float8_e5m2(uint8_t bits, from_bits_t) : x(bits) {} + inline C10_HOST_DEVICE Float8_e5m2(float value); + inline C10_HOST_DEVICE operator float() const; + inline C10_HOST_DEVICE bool isnan() const; + inline C10_HOST_DEVICE bool isinf() const; +}; + +C10_API inline std::ostream& operator<<( + std::ostream& out, + const Float8_e5m2& value) { + out << (float)value; + return out; +} + +} // namespace c10 + +#include // IWYU pragma: keep diff --git a/phivenv/Lib/site-packages/torch/include/c10/util/Float8_e5m2fnuz-inl.h b/phivenv/Lib/site-packages/torch/include/c10/util/Float8_e5m2fnuz-inl.h new file mode 100644 index 0000000000000000000000000000000000000000..f546cd1cf9cd3009ebdc6d0253dbc48e924a984b --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/util/Float8_e5m2fnuz-inl.h @@ -0,0 +1,285 @@ +#pragma once + +#include +#include +#include +#include + +C10_CLANG_DIAGNOSTIC_PUSH() +#if C10_CLANG_HAS_WARNING("-Wimplicit-int-float-conversion") +C10_CLANG_DIAGNOSTIC_IGNORE("-Wimplicit-int-float-conversion") +#endif + +namespace c10 { + +/// Constructors + +inline C10_HOST_DEVICE Float8_e5m2fnuz::Float8_e5m2fnuz(float value) + : x(detail::fp8e5m2fnuz_from_fp32_value(value)) {} + +/// Implicit conversions + +inline C10_HOST_DEVICE Float8_e5m2fnuz::operator float() const { + return detail::fp8_fnuz_to_fp32_value<5, 2>(x); +} + +/// Special values helpers + +inline C10_HOST_DEVICE bool Float8_e5m2fnuz::isnan() const { + return x == 0b10000000; +} + +inline C10_HOST_DEVICE bool Float8_e5m2fnuz::isinf() const { + return false; +} + +/// Arithmetic + +inline C10_HOST_DEVICE Float8_e5m2fnuz +operator+(const Float8_e5m2fnuz& a, const Float8_e5m2fnuz& b) { + return static_cast(a) + static_cast(b); +} + +inline C10_HOST_DEVICE Float8_e5m2fnuz +operator-(const Float8_e5m2fnuz& a, const Float8_e5m2fnuz& b) { + return static_cast(a) - static_cast(b); +} + +inline C10_HOST_DEVICE Float8_e5m2fnuz +operator*(const Float8_e5m2fnuz& a, const Float8_e5m2fnuz& b) { + return static_cast(a) * static_cast(b); +} + +inline C10_HOST_DEVICE Float8_e5m2fnuz operator/( + const Float8_e5m2fnuz& a, + const Float8_e5m2fnuz& b) __ubsan_ignore_float_divide_by_zero__ { + return static_cast(a) / static_cast(b); +} + +inline C10_HOST_DEVICE Float8_e5m2fnuz operator-(const Float8_e5m2fnuz& a) { + return -static_cast(a); +} + +inline C10_HOST_DEVICE Float8_e5m2fnuz& operator+=( + Float8_e5m2fnuz& a, + const Float8_e5m2fnuz& b) { + a = a + b; + return a; +} + +inline C10_HOST_DEVICE Float8_e5m2fnuz& operator-=( + Float8_e5m2fnuz& a, + const Float8_e5m2fnuz& b) { + a = a - b; + return a; +} + +inline C10_HOST_DEVICE Float8_e5m2fnuz& operator*=( + Float8_e5m2fnuz& a, + const Float8_e5m2fnuz& b) { + a = a * b; + return a; +} + +inline C10_HOST_DEVICE Float8_e5m2fnuz& operator/=( + Float8_e5m2fnuz& a, + const Float8_e5m2fnuz& b) { + a = a / b; + return a; +} + +/// Arithmetic with floats + +inline C10_HOST_DEVICE float operator+(Float8_e5m2fnuz a, float b) { + return static_cast(a) + b; +} +inline C10_HOST_DEVICE float operator-(Float8_e5m2fnuz a, float b) { + return static_cast(a) - b; +} +inline C10_HOST_DEVICE float operator*(Float8_e5m2fnuz a, float b) { + return static_cast(a) * b; +} +inline C10_HOST_DEVICE float operator/(Float8_e5m2fnuz a, float b) + __ubsan_ignore_float_divide_by_zero__ { + return static_cast(a) / b; +} + +inline C10_HOST_DEVICE float operator+(float a, Float8_e5m2fnuz b) { + return a + static_cast(b); +} +inline C10_HOST_DEVICE float operator-(float a, Float8_e5m2fnuz b) { + return a - static_cast(b); +} +inline C10_HOST_DEVICE float operator*(float a, Float8_e5m2fnuz b) { + return a * static_cast(b); +} +inline C10_HOST_DEVICE float operator/(float a, Float8_e5m2fnuz b) + __ubsan_ignore_float_divide_by_zero__ { + return a / static_cast(b); +} + +inline C10_HOST_DEVICE float& operator+=(float& a, const Float8_e5m2fnuz& b) { + return a += static_cast(b); +} +inline C10_HOST_DEVICE float& operator-=(float& a, const Float8_e5m2fnuz& b) { + return a -= static_cast(b); +} +inline C10_HOST_DEVICE float& operator*=(float& a, const Float8_e5m2fnuz& b) { + return a *= static_cast(b); +} +inline C10_HOST_DEVICE float& operator/=(float& a, const Float8_e5m2fnuz& b) { + return a /= static_cast(b); +} + +/// Arithmetic with doubles + +inline C10_HOST_DEVICE double operator+(Float8_e5m2fnuz a, double b) { + return static_cast(a) + b; +} +inline C10_HOST_DEVICE double operator-(Float8_e5m2fnuz a, double b) { + return static_cast(a) - b; +} +inline C10_HOST_DEVICE double operator*(Float8_e5m2fnuz a, double b) { + return static_cast(a) * b; +} +inline C10_HOST_DEVICE double operator/(Float8_e5m2fnuz a, double b) + __ubsan_ignore_float_divide_by_zero__ { + return static_cast(a) / b; +} + +inline C10_HOST_DEVICE double operator+(double a, Float8_e5m2fnuz b) { + return a + static_cast(b); +} +inline C10_HOST_DEVICE double operator-(double a, Float8_e5m2fnuz b) { + return a - static_cast(b); +} +inline C10_HOST_DEVICE double operator*(double a, Float8_e5m2fnuz b) { + return a * static_cast(b); +} +inline C10_HOST_DEVICE double operator/(double a, Float8_e5m2fnuz b) + __ubsan_ignore_float_divide_by_zero__ { + return a / static_cast(b); +} + +/// Arithmetic with ints + +inline C10_HOST_DEVICE Float8_e5m2fnuz operator+(Float8_e5m2fnuz a, int b) { + return a + static_cast(b); +} +inline C10_HOST_DEVICE Float8_e5m2fnuz operator-(Float8_e5m2fnuz a, int b) { + return a - static_cast(b); +} +inline C10_HOST_DEVICE Float8_e5m2fnuz operator*(Float8_e5m2fnuz a, int b) { + return a * static_cast(b); +} +inline C10_HOST_DEVICE Float8_e5m2fnuz operator/(Float8_e5m2fnuz a, int b) { + return a / static_cast(b); +} + +inline C10_HOST_DEVICE Float8_e5m2fnuz operator+(int a, Float8_e5m2fnuz b) { + return static_cast(a) + b; +} +inline C10_HOST_DEVICE Float8_e5m2fnuz operator-(int a, Float8_e5m2fnuz b) { + return static_cast(a) - b; +} +inline C10_HOST_DEVICE Float8_e5m2fnuz operator*(int a, Float8_e5m2fnuz b) { + return static_cast(a) * b; +} +inline C10_HOST_DEVICE Float8_e5m2fnuz operator/(int a, Float8_e5m2fnuz b) { + return static_cast(a) / b; +} + +//// Arithmetic with int64_t + +inline C10_HOST_DEVICE Float8_e5m2fnuz operator+(Float8_e5m2fnuz a, int64_t b) { + return a + static_cast(b); +} +inline C10_HOST_DEVICE Float8_e5m2fnuz operator-(Float8_e5m2fnuz a, int64_t b) { + return a - static_cast(b); +} +inline C10_HOST_DEVICE Float8_e5m2fnuz operator*(Float8_e5m2fnuz a, int64_t b) { + return a * static_cast(b); +} +inline C10_HOST_DEVICE Float8_e5m2fnuz operator/(Float8_e5m2fnuz a, int64_t b) { + return a / static_cast(b); +} + +inline C10_HOST_DEVICE Float8_e5m2fnuz operator+(int64_t a, Float8_e5m2fnuz b) { + return static_cast(a) + b; +} +inline C10_HOST_DEVICE Float8_e5m2fnuz operator-(int64_t a, Float8_e5m2fnuz b) { + return static_cast(a) - b; +} +inline C10_HOST_DEVICE Float8_e5m2fnuz operator*(int64_t a, Float8_e5m2fnuz b) { + return static_cast(a) * b; +} +inline C10_HOST_DEVICE Float8_e5m2fnuz operator/(int64_t a, Float8_e5m2fnuz b) { + return static_cast(a) / b; +} + +/// NOTE: we do not define comparisons directly and instead rely on the implicit +/// conversion from c10::Float8_e5m2fnuz to float. + +} // namespace c10 + +namespace std { + +template <> +class numeric_limits { + public: + static constexpr bool is_signed = true; + static constexpr bool is_integer = false; + static constexpr bool is_specialized = true; + static constexpr bool is_exact = false; + static constexpr bool has_infinity = false; + static constexpr bool has_quiet_NaN = true; + static constexpr bool has_signaling_NaN = false; + static constexpr auto has_denorm = true; + static constexpr auto has_denorm_loss = true; + static constexpr auto round_style = numeric_limits::round_style; + static constexpr bool is_iec559 = false; + static constexpr bool is_bounded = true; + static constexpr bool is_modulo = false; + static constexpr int digits = 3; + static constexpr int digits10 = 0; + static constexpr int max_digits10 = 2; + static constexpr int radix = 2; + static constexpr int min_exponent = -14; + static constexpr int min_exponent10 = -4; + static constexpr int max_exponent = 16; + static constexpr int max_exponent10 = 4; + static constexpr auto traps = numeric_limits::traps; + static constexpr auto tinyness_before = + numeric_limits::tinyness_before; + + static constexpr c10::Float8_e5m2fnuz min() { + return c10::Float8_e5m2fnuz(0x04, c10::Float8_e5m2fnuz::from_bits()); + } + static constexpr c10::Float8_e5m2fnuz max() { + return c10::Float8_e5m2fnuz(0x7F, c10::Float8_e5m2fnuz::from_bits()); + } + static constexpr c10::Float8_e5m2fnuz lowest() { + return c10::Float8_e5m2fnuz(0xFF, c10::Float8_e5m2fnuz::from_bits()); + } + static constexpr c10::Float8_e5m2fnuz epsilon() { + return c10::Float8_e5m2fnuz(0x34, c10::Float8_e5m2fnuz::from_bits()); + } + static constexpr c10::Float8_e5m2fnuz round_error() { + return c10::Float8_e5m2fnuz(0x38, c10::Float8_e5m2fnuz::from_bits()); + } + static constexpr c10::Float8_e5m2fnuz infinity() { + return c10::Float8_e5m2fnuz(0x80, c10::Float8_e5m2fnuz::from_bits()); + } + // TODO(future): we are mapping neg_zero to both inf and NaN, this is + // surprising and we should figure out what to do about it. + static constexpr c10::Float8_e5m2fnuz quiet_NaN() { + return c10::Float8_e5m2fnuz(0x80, c10::Float8_e5m2fnuz::from_bits()); + } + static constexpr c10::Float8_e5m2fnuz denorm_min() { + return c10::Float8_e5m2fnuz(0x01, c10::Float8_e5m2fnuz::from_bits()); + } +}; + +} // namespace std + +C10_CLANG_DIAGNOSTIC_POP() diff --git a/phivenv/Lib/site-packages/torch/include/c10/util/Float8_e5m2fnuz.h b/phivenv/Lib/site-packages/torch/include/c10/util/Float8_e5m2fnuz.h new file mode 100644 index 0000000000000000000000000000000000000000..b7f75d53d97f250bb87cde222e9e9b41b2452868 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/util/Float8_e5m2fnuz.h @@ -0,0 +1,138 @@ +#pragma once + +/// Defines the Float8_e5m2fnuz type (8-bit floating-point) including +/// conversions to standard C types and basic arithmetic operations. Note that +/// arithmetic operations are implemented by converting to floating point and +/// performing the operation in float32. +/// Binary configuration remains the same as e5m2: +/// s eeeee mm +/// 1 sign bit +/// 5 exponent bits +/// 2 mantissa bits +/// The key differences that e5m2fnuz brings are: +/// bias = 16 +/// no infinities or negative zero +/// NaN only when sign bit is 1, rest all 0s +/// +/// Implementation based on the paper https://arxiv.org/pdf/2206.02915.pdf and +/// the existing Float8_e4m3fn implementation. + +#include +#include +#include + +#if defined(__cplusplus) +#include +#elif !defined(__OPENCL_VERSION__) +#include +#include +#endif + +#include +#include + +namespace c10 { + +namespace detail { + +/* + * Convert a 32-bit floating-point number in IEEE single-precision format to a + * 8-bit floating-point number in fp8 E5M2 format, in bit representation. + */ +inline C10_HOST_DEVICE uint8_t fp8e5m2fnuz_from_fp32_value(float f) { + /* + * Binary representation of 65536.0f, which is the first value not + * representable (i.e. the first value which would overflow in to the sign + * bit, resulting in a NaN) in fp8e4m3fnuz range: + * 1 00000 00 - fp8e5m2fnuz + * 0 10001111 00000000000000000000000 - fp32 + */ + constexpr uint32_t fnuz_max = UINT32_C(0x8F) << 23; + + /* + * A mask for converting fp32 numbers lower than fp8e5m2fnuz normal range + * into denormalized representation. + * magic number: ((127 - 16) + (23 - 2) + 1) + */ + constexpr uint32_t denorm_mask = UINT32_C(0x85) << 23; + + uint32_t f_bits = fp32_to_bits(f); + uint32_t result = 0u; + + /* + * Extract the sign of the input number into the high bit of the 32-bit word: + * + * +---+----------------------------------+ + * | S |0000000 00000000 00000000 00000000| + * +---+----------------------------------+ + * Bits 31 0-31 + */ + const uint32_t sign = f_bits & UINT32_C(0x80000000); + + /* + * Set sign bit to 0 + */ + f_bits ^= sign; + + if (f_bits >= fnuz_max) { + // NaN -- sign bit set to 1, rest 0s + return 0x80; + } + + if (f_bits < (UINT32_C(0x70) << 23) /* 2^-15 in float32 */) { + // Input exponent is less than -15, the smallest e5m2fnuz exponent, so the + // number will become subnormal. + f_bits = fp32_to_bits(fp32_from_bits(f_bits) + fp32_from_bits(denorm_mask)); + result = static_cast(f_bits - denorm_mask); + if (result == 0) { + // fnuz types don't have negative zero. + return 0; + } + } else { + // resulting mantissa is odd + uint8_t mant_odd = (f_bits >> 21) & 1; + + // update exponent, rounding bias part 1 + f_bits += ((uint32_t)(16 - 127) << 23) + 0xFFFFF; + + // rounding bias part 2 + f_bits += mant_odd; + + // take the bits! + result = static_cast(f_bits >> 21); + } + + result |= sign >> 24; + return result; +} + +} // namespace detail + +struct alignas(1) Float8_e5m2fnuz { + uint8_t x; + + struct from_bits_t {}; + C10_HOST_DEVICE static constexpr from_bits_t from_bits() { + return from_bits_t(); + } + + Float8_e5m2fnuz() = default; + + constexpr C10_HOST_DEVICE Float8_e5m2fnuz(uint8_t bits, from_bits_t) + : x(bits) {} + inline C10_HOST_DEVICE Float8_e5m2fnuz(float value); + inline C10_HOST_DEVICE operator float() const; + inline C10_HOST_DEVICE bool isnan() const; + inline C10_HOST_DEVICE bool isinf() const; +}; + +C10_API inline std::ostream& operator<<( + std::ostream& out, + const Float8_e5m2fnuz& value) { + out << (float)value; + return out; +} + +} // namespace c10 + +#include // IWYU pragma: keep diff --git a/phivenv/Lib/site-packages/torch/include/c10/util/Float8_e8m0fnu-inl.h b/phivenv/Lib/site-packages/torch/include/c10/util/Float8_e8m0fnu-inl.h new file mode 100644 index 0000000000000000000000000000000000000000..5aa31e7b25ff8b2028ecc477831bddf5e8c4a219 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/util/Float8_e8m0fnu-inl.h @@ -0,0 +1,112 @@ +#pragma once + +#include +#include +#include +#include + +// TODO(#146647): Can we remove the below warning? +C10_CLANG_DIAGNOSTIC_PUSH() +#if C10_CLANG_HAS_WARNING("-Wimplicit-int-float-conversion") +C10_CLANG_DIAGNOSTIC_IGNORE("-Wimplicit-int-float-conversion") +#endif + +namespace c10 { + +/// Constructors + +inline C10_HOST_DEVICE Float8_e8m0fnu::Float8_e8m0fnu(float value) + : x(detail::fp8e8m0fnu_from_fp32_value(value)) {} + +/// Implicit conversions + +inline C10_HOST_DEVICE Float8_e8m0fnu::operator float() const { + // TODO(#146647): maybe rewrite without control flow + + // if exponent is zero, need to special case to return 2^-127 instead of zero + if (x == 0) { + return c10::detail::fp32_from_bits(0x00400000); + } + + // if exponent is NaN, need to special case to return properly encoded NaN + if (isnan()) { + return c10::detail::fp32_from_bits(0x7f800001); + } + + // leave sign at 0, set the exponent bits, leave stored mantissa at 0 + uint32_t res = x << 23; + + return c10::detail::fp32_from_bits(res); +} + +/// Special values helper + +inline C10_HOST_DEVICE bool Float8_e8m0fnu::isnan() const { + return x == 0b11111111; +} + +/// NOTE: we do not define comparisons directly and instead rely on the implicit +/// conversion from c10::Float8_e8m0fnu to float. + +} // namespace c10 + +namespace std { + +template <> +class numeric_limits { + public: + static constexpr bool is_specialized = true; + static constexpr bool is_signed = false; + static constexpr bool is_integer = false; + static constexpr bool is_exact = false; + static constexpr bool has_infinity = false; + static constexpr bool has_quiet_NaN = true; + static constexpr bool has_signaling_NaN = false; + static constexpr auto has_denorm = false; + static constexpr auto has_denorm_loss = false; + static constexpr auto round_style = numeric_limits::round_style; + static constexpr bool is_iec559 = false; + static constexpr bool is_bounded = true; + static constexpr bool is_modulo = false; + static constexpr int digits = 1; + static constexpr int digits10 = 0; + static constexpr int max_digits10 = 1; // just a 2! + static constexpr int radix = 2; + static constexpr int min_exponent = -126; + static constexpr int min_exponent10 = -38; + static constexpr int max_exponent = 128; + static constexpr int max_exponent10 = 38; + static constexpr auto traps = numeric_limits::traps; + static constexpr auto tinyness_before = false; + + static constexpr c10::Float8_e8m0fnu min() { + // 2^-127 + return c10::Float8_e8m0fnu(0b00000000, c10::Float8_e8m0fnu::from_bits()); + } + static constexpr c10::Float8_e8m0fnu lowest() { + // 2^-127 + return c10::Float8_e8m0fnu(0b00000000, c10::Float8_e8m0fnu::from_bits()); + } + static constexpr c10::Float8_e8m0fnu max() { + // 254 biased, which is 127 unbiased, so 2^127 + return c10::Float8_e8m0fnu(0b11111110, c10::Float8_e8m0fnu::from_bits()); + } + static constexpr c10::Float8_e8m0fnu epsilon() { + // according to https://en.cppreference.com/w/cpp/types/numeric_limits, this + // is "the difference between 1.0 and the next representable value of the + // given floating-point type". The next representable value is 2.0, so the + // difference is 1.0 which is 2^0. 0 unbiased is 127 biased. + return c10::Float8_e8m0fnu(0b01111111, c10::Float8_e8m0fnu::from_bits()); + } + static constexpr c10::Float8_e8m0fnu round_error() { + // 0.5 in float, which is 2^-1, and -1 + 127 = 126 + return c10::Float8_e8m0fnu(0b01111110, c10::Float8_e8m0fnu::from_bits()); + } + static constexpr c10::Float8_e8m0fnu quiet_NaN() { + return c10::Float8_e8m0fnu(0b11111111, c10::Float8_e8m0fnu::from_bits()); + } +}; + +} // namespace std + +C10_CLANG_DIAGNOSTIC_POP() diff --git a/phivenv/Lib/site-packages/torch/include/c10/util/Float8_e8m0fnu.h b/phivenv/Lib/site-packages/torch/include/c10/util/Float8_e8m0fnu.h new file mode 100644 index 0000000000000000000000000000000000000000..eae12895ef8e3c67de31992ba2291e2fcf3783a4 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/util/Float8_e8m0fnu.h @@ -0,0 +1,120 @@ +#pragma once + +/// Defines the Float8_e8m0fnu type (8-bit floating-point) including +/// conversions to standard C types +/// Binary configuration : +/// eeeeeeee +/// no sign bits +/// 8 exponent bits +/// no mantissa bits +/// +/// This is the E8M0 dtype from the OCP MX format spec +/// (https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf, +/// Section 5.4.1) + +#include +#include +#include +#include + +// TODO(#146647): do we need to special case OPENCL? +#if defined(__cplusplus) +#include +#elif !defined(__OPENCL_VERSION__) +#include +#include +#endif + +#include +#include + +namespace c10 { + +namespace detail { + +/* + * Convert a 32-bit floating-point number in IEEE single-precision format to a + * 8-bit floating-point number in fp8 e8m0fnu format, in bit representation. + */ +inline C10_HOST_DEVICE uint8_t fp8e8m0fnu_from_fp32_value(float f) { + // TODO(#146647): maybe rewrite without control flow + + uint32_t f_bits = c10::detail::fp32_to_bits(f); + + // extract the exponent + uint32_t exponent = (f_bits >> 23) & 0b11111111; + + // special case float32 NaN and +-inf to map to e8m0 nan + if (exponent == 0b11111111) { + return exponent; + } + + // next, we use guard, round, sticky bits and the LSB to implement round to + // nearest, with ties to even + + // guard bit - bit 23, or 22 zero-indexed + uint8_t g = (f_bits & 0x400000) > 0; + // round bit - bit 22, or 21 zero-indexed + uint8_t r = (f_bits & 0x200000) > 0; + // sticky bit - bits 21 to 1, or 20 to 0 zero-indexed + uint8_t s = (f_bits & 0x1FFFFF) > 0; + // in casting to e8m0, LSB is the implied mantissa bit. It equals to 0 if the + // original float32 is denormal, and to 1 if the original float32 is normal. + uint8_t lsb = exponent > 0; + + // implement the RNE logic + bool round_up = false; + + // if g == 0, round down (no-op) + if (g == 1) { + if ((r == 1) || (s == 1)) { + // round up + round_up = true; + } else { + if (lsb == 1) { + // round up + round_up = true; + } + // if lsb == 0, round down (no-op) + } + } + + if (round_up) { + // adjust exponent + // note that if exponent was 255 we would have already returned earlier, so + // we know we can add one safely without running out of bounds + exponent++; + } + + return exponent; +} + +} // namespace detail + +struct alignas(1) Float8_e8m0fnu { + uint8_t x; + + struct from_bits_t {}; + C10_HOST_DEVICE static constexpr from_bits_t from_bits() { + return from_bits_t(); + } + + Float8_e8m0fnu() = default; + + constexpr C10_HOST_DEVICE Float8_e8m0fnu(uint8_t bits, from_bits_t) + : x(bits) {} + inline C10_HOST_DEVICE Float8_e8m0fnu(float value); + inline C10_HOST_DEVICE operator float() const; + inline C10_HOST_DEVICE bool isnan() const; +}; + +C10_API inline std::ostream& operator<<( + std::ostream& out, + const Float8_e8m0fnu& value) { + out << (float)value; + return out; +} + +} // namespace c10 + +#include // IWYU pragma: keep diff --git a/phivenv/Lib/site-packages/torch/include/c10/util/Float8_fnuz_cvt.h b/phivenv/Lib/site-packages/torch/include/c10/util/Float8_fnuz_cvt.h new file mode 100644 index 0000000000000000000000000000000000000000..6afdb74dd521f2a4be8351bbaef0b02b2e0166f3 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/util/Float8_fnuz_cvt.h @@ -0,0 +1,64 @@ +#pragma once + +#include + +#include + +#if defined(SYCL_LANGUAGE_VERSION) +#include +#endif + +namespace c10::detail { + +/* + * Convert a 8-bit floating-point number in either f8 E4M3FNUZ or bf8 E5M2FNUZ + * format, in bit representation, to a 32-bit floating-point number. + */ +template +inline C10_HOST_DEVICE float fp8_fnuz_to_fp32_value(uint8_t x) { + static_assert((we == 4 && wm == 3) || (we == 5 && wm == 2)); + constexpr uint32_t weo = 8; + constexpr uint32_t wmo = 23; + + if (x == 0) { + return 0; + } + + if (x == 0x80) { + constexpr uint32_t ifNaN = 0x7F800001; + return fp32_from_bits(ifNaN); + } + + uint32_t mantissa = x & ((1 << wm) - 1); + uint32_t exponent = (x & 0x7F) >> wm; + + // subnormal input + if (exponent == 0) { + // guaranteed mantissa!=0 since cases 0x0 and 0x80 are handled above +#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__) + uint32_t renorm_shift = __clz(mantissa); +#elif defined(__SYCL_DEVICE_ONLY__) + uint32_t renorm_shift = sycl::clz(mantissa); +#elif defined(_MSC_VER) + unsigned long nonsign_bsr; + _BitScanReverse(&nonsign_bsr, (unsigned long)mantissa); + uint32_t renorm_shift = (uint32_t)nonsign_bsr ^ 31; +#else + uint32_t renorm_shift = __builtin_clz(mantissa); +#endif + uint32_t sh = 1 + renorm_shift - (32 - wm); + mantissa <<= sh; + exponent += 1 - sh; + mantissa &= ((1 << wm) - 1); + } + + const uint32_t exp_low_cutoff = (1 << (weo - 1)) - (1 << (we - 1)); + exponent += exp_low_cutoff - 1; + mantissa <<= wmo - wm; + + uint32_t sign = x >> 7; + uint32_t retval = (sign << 31) | (exponent << 23) | mantissa; + return fp32_from_bits(retval); +} + +} // namespace c10::detail diff --git a/phivenv/Lib/site-packages/torch/include/c10/util/FunctionRef.h b/phivenv/Lib/site-packages/torch/include/c10/util/FunctionRef.h new file mode 100644 index 0000000000000000000000000000000000000000..aac5e11d99e13bd33431e25e68158a08a8de1223 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/util/FunctionRef.h @@ -0,0 +1,73 @@ +//===- llvm/ADT/STLExtras.h - Useful STL related functions ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains some templates that are useful if you are working with the +// STL at all. +// +// No library is required when using these functions. +// +//===----------------------------------------------------------------------===// + +// c10: modified from llvm::function_ref +// c10: added more SFINAE to enable use in overloaded functions + +#pragma once + +#include +#include +#include + +namespace c10 { + +/// An efficient, type-erasing, non-owning reference to a callable. This is +/// intended for use as the type of a function parameter that is not used +/// after the function in question returns. +/// +/// This class does not own the callable, so it is not in general safe to store +/// a function_ref. +template +class function_ref; + +template +class function_ref { + Ret (*callback)(intptr_t callable, Params... params) = nullptr; + intptr_t callable{}; + + template + static Ret callback_fn(intptr_t callable, Params... params) { + return (*reinterpret_cast(callable))( + std::forward(params)...); + } + + public: + function_ref() = default; + function_ref(std::nullptr_t) {} + + template + function_ref( + // NOLINTNEXTLINE(cppcoreguidelines-missing-std-forward) + Callable&& callable, + std::enable_if_t< + !std::is_same_v, function_ref>>* = + nullptr, + std::enable_if_t, + Ret>>* = nullptr) + : callback(callback_fn>), + callable(reinterpret_cast(&callable)) {} + + Ret operator()(Params... params) const { + return callback(callable, std::forward(params)...); + } + + operator bool() const { + return callback; + } +}; + +} // namespace c10 diff --git a/phivenv/Lib/site-packages/torch/include/c10/util/Gauge.h b/phivenv/Lib/site-packages/torch/include/c10/util/Gauge.h new file mode 100644 index 0000000000000000000000000000000000000000..8ae7fdad94428b8f49ef3770b71bfa400fed7512 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/util/Gauge.h @@ -0,0 +1,49 @@ +#pragma once + +#include +#include + +#include +#include + +namespace c10::monitor { +namespace detail { + +class GaugeImpl; + +class GaugeBackendIf { + public: + virtual ~GaugeBackendIf() = default; + virtual void record(int64_t value) noexcept = 0; +}; + +class GaugeBackendFactoryIf { + public: + virtual ~GaugeBackendFactoryIf() = default; + + // May return nullptr if the gauge will be ignored by the given backend. + virtual std::unique_ptr create( + std::string_view key) noexcept = 0; +}; + +void C10_API registerGaugeBackend(std::unique_ptr); +} // namespace detail + +// A handle to a Gauge. +class C10_API GaugeHandle { + public: + explicit GaugeHandle(std::string_view key); + void record(int64_t value); + + private: + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + detail::GaugeImpl& impl_; +}; + +} // namespace c10::monitor + +#define STATIC_GAUGE(_key) \ + []() -> ::c10::monitor::GaugeHandle& { \ + static ::c10::monitor::GaugeHandle handle(#_key); \ + return handle; \ + }() diff --git a/phivenv/Lib/site-packages/torch/include/c10/util/Half-inl.h b/phivenv/Lib/site-packages/torch/include/c10/util/Half-inl.h new file mode 100644 index 0000000000000000000000000000000000000000..98df5f8d0d5f734471853b0fd5c5a4ee2b56627b --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/util/Half-inl.h @@ -0,0 +1,350 @@ +#pragma once + +#include +#include + +#include +#include + +#ifdef __CUDACC__ +#include +#endif + +#ifdef __HIPCC__ +#include +#endif + +#if defined(CL_SYCL_LANGUAGE_VERSION) +#include // for SYCL 1.2.1 +#elif defined(SYCL_LANGUAGE_VERSION) +#include // for SYCL 2020 +#endif + +#if (defined(CPU_CAPABILITY_AVX2) || defined(CPU_CAPABILITY_AVX512)) && \ + !defined(__APPLE__) +#include +#endif + +C10_CLANG_DIAGNOSTIC_PUSH() +#if C10_CLANG_HAS_WARNING("-Wimplicit-int-float-conversion") +C10_CLANG_DIAGNOSTIC_IGNORE("-Wimplicit-int-float-conversion") +#endif + +namespace c10 { + +#if defined(__aarch64__) && !defined(__CUDACC__) +/// Constructors +inline Half::Half(float16_t value) : x(detail::fp16_to_bits(value)) {} +inline Half::operator float16_t() const { + return detail::fp16_from_bits(x); +} +#else + +inline C10_HOST_DEVICE Half::Half(float value) + : +#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__) + x(__half_as_short(__float2half(value))) +#elif defined(__SYCL_DEVICE_ONLY__) + x(c10::bit_cast(sycl::half(value))) +#elif (defined(CPU_CAPABILITY_AVX2) || defined(CPU_CAPABILITY_AVX512)) && \ + !defined(__APPLE__) + x(at::vec::float2half_scalar(value)) +#else + x(detail::fp16_ieee_from_fp32_value(value)) +#endif +{ +} + +/// Implicit conversions + +inline C10_HOST_DEVICE Half::operator float() const { +#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__) + return __half2float(*reinterpret_cast(&x)); +#elif defined(__SYCL_DEVICE_ONLY__) + return float(c10::bit_cast(x)); +#elif (defined(CPU_CAPABILITY_AVX2) || defined(CPU_CAPABILITY_AVX512)) && \ + !defined(__APPLE__) + return at::vec::half2float_scalar(x); +#elif defined(__aarch64__) && !defined(__CUDACC__) + return detail::native_fp16_to_fp32_value(x); +#else + return detail::fp16_ieee_to_fp32_value(x); +#endif +} + +#endif /* !defined(__aarch64__) || defined(__CUDACC__) \ + */ + +#if defined(__CUDACC__) || defined(__HIPCC__) +inline C10_HOST_DEVICE Half::Half(const __half& value) { + x = *reinterpret_cast(&value); +} +inline C10_HOST_DEVICE Half::operator __half() const { + return *reinterpret_cast(&x); +} +#endif + +#ifdef SYCL_LANGUAGE_VERSION +inline C10_HOST_DEVICE Half::Half(const sycl::half& value) { + x = *reinterpret_cast(&value); +} +inline C10_HOST_DEVICE Half::operator sycl::half() const { + return *reinterpret_cast(&x); +} +#endif + +// CUDA intrinsics + +#if (defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 350)) || \ + (defined(__clang__) && defined(__CUDA__)) +inline __device__ Half __ldg(const Half* ptr) { + return __ldg(reinterpret_cast(ptr)); +} +#endif + +/// Arithmetic + +inline C10_HOST_DEVICE Half operator+(const Half& a, const Half& b) { + return static_cast(a) + static_cast(b); +} + +inline C10_HOST_DEVICE Half operator-(const Half& a, const Half& b) { + return static_cast(a) - static_cast(b); +} + +inline C10_HOST_DEVICE Half operator*(const Half& a, const Half& b) { + return static_cast(a) * static_cast(b); +} + +inline C10_HOST_DEVICE Half operator/(const Half& a, const Half& b) + __ubsan_ignore_float_divide_by_zero__ { + return static_cast(a) / static_cast(b); +} + +inline C10_HOST_DEVICE Half operator-(const Half& a) { +#if (defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530) || \ + defined(__HIP_DEVICE_COMPILE__) + return __hneg(a); +#elif defined(__SYCL_DEVICE_ONLY__) + return -c10::bit_cast(a); +#else + return -static_cast(a); +#endif +} + +inline C10_HOST_DEVICE Half& operator+=(Half& a, const Half& b) { + a = a + b; + return a; +} + +inline C10_HOST_DEVICE Half& operator-=(Half& a, const Half& b) { + a = a - b; + return a; +} + +inline C10_HOST_DEVICE Half& operator*=(Half& a, const Half& b) { + a = a * b; + return a; +} + +inline C10_HOST_DEVICE Half& operator/=(Half& a, const Half& b) { + a = a / b; + return a; +} + +/// Arithmetic with floats + +inline C10_HOST_DEVICE float operator+(Half a, float b) { + return static_cast(a) + b; +} +inline C10_HOST_DEVICE float operator-(Half a, float b) { + return static_cast(a) - b; +} +inline C10_HOST_DEVICE float operator*(Half a, float b) { + return static_cast(a) * b; +} +inline C10_HOST_DEVICE float operator/(Half a, float b) + __ubsan_ignore_float_divide_by_zero__ { + return static_cast(a) / b; +} + +inline C10_HOST_DEVICE float operator+(float a, Half b) { + return a + static_cast(b); +} +inline C10_HOST_DEVICE float operator-(float a, Half b) { + return a - static_cast(b); +} +inline C10_HOST_DEVICE float operator*(float a, Half b) { + return a * static_cast(b); +} +inline C10_HOST_DEVICE float operator/(float a, Half b) + __ubsan_ignore_float_divide_by_zero__ { + return a / static_cast(b); +} + +inline C10_HOST_DEVICE float& operator+=(float& a, const Half& b) { + return a += static_cast(b); +} +inline C10_HOST_DEVICE float& operator-=(float& a, const Half& b) { + return a -= static_cast(b); +} +inline C10_HOST_DEVICE float& operator*=(float& a, const Half& b) { + return a *= static_cast(b); +} +inline C10_HOST_DEVICE float& operator/=(float& a, const Half& b) { + return a /= static_cast(b); +} + +/// Arithmetic with doubles + +inline C10_HOST_DEVICE double operator+(Half a, double b) { + return static_cast(a) + b; +} +inline C10_HOST_DEVICE double operator-(Half a, double b) { + return static_cast(a) - b; +} +inline C10_HOST_DEVICE double operator*(Half a, double b) { + return static_cast(a) * b; +} +inline C10_HOST_DEVICE double operator/(Half a, double b) + __ubsan_ignore_float_divide_by_zero__ { + return static_cast(a) / b; +} + +inline C10_HOST_DEVICE double operator+(double a, Half b) { + return a + static_cast(b); +} +inline C10_HOST_DEVICE double operator-(double a, Half b) { + return a - static_cast(b); +} +inline C10_HOST_DEVICE double operator*(double a, Half b) { + return a * static_cast(b); +} +inline C10_HOST_DEVICE double operator/(double a, Half b) + __ubsan_ignore_float_divide_by_zero__ { + return a / static_cast(b); +} + +/// Arithmetic with ints + +inline C10_HOST_DEVICE Half operator+(Half a, int b) { + return a + static_cast(b); +} +inline C10_HOST_DEVICE Half operator-(Half a, int b) { + return a - static_cast(b); +} +inline C10_HOST_DEVICE Half operator*(Half a, int b) { + return a * static_cast(b); +} +inline C10_HOST_DEVICE Half operator/(Half a, int b) { + return a / static_cast(b); +} + +inline C10_HOST_DEVICE Half operator+(int a, Half b) { + return static_cast(a) + b; +} +inline C10_HOST_DEVICE Half operator-(int a, Half b) { + return static_cast(a) - b; +} +inline C10_HOST_DEVICE Half operator*(int a, Half b) { + return static_cast(a) * b; +} +inline C10_HOST_DEVICE Half operator/(int a, Half b) { + return static_cast(a) / b; +} + +//// Arithmetic with int64_t + +inline C10_HOST_DEVICE Half operator+(Half a, int64_t b) { + return a + static_cast(b); +} +inline C10_HOST_DEVICE Half operator-(Half a, int64_t b) { + return a - static_cast(b); +} +inline C10_HOST_DEVICE Half operator*(Half a, int64_t b) { + return a * static_cast(b); +} +inline C10_HOST_DEVICE Half operator/(Half a, int64_t b) { + return a / static_cast(b); +} + +inline C10_HOST_DEVICE Half operator+(int64_t a, Half b) { + return static_cast(a) + b; +} +inline C10_HOST_DEVICE Half operator-(int64_t a, Half b) { + return static_cast(a) - b; +} +inline C10_HOST_DEVICE Half operator*(int64_t a, Half b) { + return static_cast(a) * b; +} +inline C10_HOST_DEVICE Half operator/(int64_t a, Half b) { + return static_cast(a) / b; +} + +/// NOTE: we do not define comparisons directly and instead rely on the implicit +/// conversion from c10::Half to float. + +} // namespace c10 + +namespace std { + +template <> +class numeric_limits { + public: + static constexpr bool is_specialized = true; + static constexpr bool is_signed = true; + static constexpr bool is_integer = false; + static constexpr bool is_exact = false; + static constexpr bool has_infinity = true; + static constexpr bool has_quiet_NaN = true; + static constexpr bool has_signaling_NaN = true; + static constexpr auto has_denorm = numeric_limits::has_denorm; + static constexpr auto has_denorm_loss = + numeric_limits::has_denorm_loss; + static constexpr auto round_style = numeric_limits::round_style; + static constexpr bool is_iec559 = true; + static constexpr bool is_bounded = true; + static constexpr bool is_modulo = false; + static constexpr int digits = 11; + static constexpr int digits10 = 3; + static constexpr int max_digits10 = 5; + static constexpr int radix = 2; + static constexpr int min_exponent = -13; + static constexpr int min_exponent10 = -4; + static constexpr int max_exponent = 16; + static constexpr int max_exponent10 = 4; + static constexpr auto traps = numeric_limits::traps; + static constexpr auto tinyness_before = + numeric_limits::tinyness_before; + static constexpr c10::Half min() { + return c10::Half(0x0400, c10::Half::from_bits()); + } + static constexpr c10::Half lowest() { + return c10::Half(0xFBFF, c10::Half::from_bits()); + } + static constexpr c10::Half max() { + return c10::Half(0x7BFF, c10::Half::from_bits()); + } + static constexpr c10::Half epsilon() { + return c10::Half(0x1400, c10::Half::from_bits()); + } + static constexpr c10::Half round_error() { + return c10::Half(0x3800, c10::Half::from_bits()); + } + static constexpr c10::Half infinity() { + return c10::Half(0x7C00, c10::Half::from_bits()); + } + static constexpr c10::Half quiet_NaN() { + return c10::Half(0x7E00, c10::Half::from_bits()); + } + static constexpr c10::Half signaling_NaN() { + return c10::Half(0x7D00, c10::Half::from_bits()); + } + static constexpr c10::Half denorm_min() { + return c10::Half(0x0001, c10::Half::from_bits()); + } +}; + +} // namespace std + +C10_CLANG_DIAGNOSTIC_POP() diff --git a/phivenv/Lib/site-packages/torch/include/c10/util/Half.h b/phivenv/Lib/site-packages/torch/include/c10/util/Half.h new file mode 100644 index 0000000000000000000000000000000000000000..82a77ff0af93292866ae5c841ab423c9b5fae56a --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/util/Half.h @@ -0,0 +1,424 @@ +#pragma once + +/// Defines the Half type (half-precision floating-point) including conversions +/// to standard C types and basic arithmetic operations. Note that arithmetic +/// operations are implemented by converting to floating point and +/// performing the operation in float32, instead of using CUDA half intrinsics. +/// Most uses of this type within ATen are memory bound, including the +/// element-wise kernels, and the half intrinsics aren't efficient on all GPUs. +/// If you are writing a compute bound kernel, you can use the CUDA half +/// intrinsics directly on the Half type from device code. + +#include +#include +#include +#include +#include + +#if defined(__cplusplus) +#include +#elif !defined(__OPENCL_VERSION__) +#include +#endif + +#ifdef _MSC_VER +#include +#endif + +#include +#include +#include +#include +#include + +#ifdef __CUDACC__ +#include +#endif + +#ifdef __HIPCC__ +#include +#endif + +#if defined(CL_SYCL_LANGUAGE_VERSION) +#include // for SYCL 1.2.1 +#elif defined(SYCL_LANGUAGE_VERSION) +#include // for SYCL 2020 +#endif + +#if defined(__aarch64__) && !defined(__CUDACC__) +#include +#endif + +#if defined(__GNUC__) || defined(__clang__) +#if defined(__x86_64__) || defined(_M_X64) || defined(__i386) || \ + defined(_M_IX86) +#if defined(__F16C__) && \ + !(defined(__CUDA_ARCH__) || defined(__CUDACC__) || \ + defined(__HIP_DEVICE_COMPILE__)) +#define C10_X86_F16 1 +#include // import conversion ops from f16cintrin.h +#endif // defined(__F16C__) && !(defined(__CUDA_ARCH__) || defined(__CUDACC__) + // || defined(__HIP_DEVICE_COMPILE__)) +#endif // __x86_64__ || _M_X64 || __i386 || _M_IX86 +#endif // __GNUC__ || __clang__ + +namespace c10 { + +namespace detail { + +/* + * Convert a 16-bit floating-point number in IEEE half-precision format, in bit + * representation, to a 32-bit floating-point number in IEEE single-precision + * format, in bit representation. + * + * @note The implementation doesn't use any floating-point operations. + */ +inline uint32_t fp16_ieee_to_fp32_bits(uint16_t h) { + /* + * Extend the half-precision floating-point number to 32 bits and shift to the + * upper part of the 32-bit word: + * +---+-----+------------+-------------------+ + * | S |EEEEE|MM MMMM MMMM|0000 0000 0000 0000| + * +---+-----+------------+-------------------+ + * Bits 31 26-30 16-25 0-15 + * + * S - sign bit, E - bits of the biased exponent, M - bits of the mantissa, 0 + * - zero bits. + */ + const uint32_t w = (uint32_t)h << 16; + /* + * Extract the sign of the input number into the high bit of the 32-bit word: + * + * +---+----------------------------------+ + * | S |0000000 00000000 00000000 00000000| + * +---+----------------------------------+ + * Bits 31 0-31 + */ + const uint32_t sign = w & UINT32_C(0x80000000); + /* + * Extract mantissa and biased exponent of the input number into the bits 0-30 + * of the 32-bit word: + * + * +---+-----+------------+-------------------+ + * | 0 |EEEEE|MM MMMM MMMM|0000 0000 0000 0000| + * +---+-----+------------+-------------------+ + * Bits 30 27-31 17-26 0-16 + */ + const uint32_t nonsign = w & UINT32_C(0x7FFFFFFF); + /* + * Renorm shift is the number of bits to shift mantissa left to make the + * half-precision number normalized. If the initial number is normalized, some + * of its high 6 bits (sign == 0 and 5-bit exponent) equals one. In this case + * renorm_shift == 0. If the number is denormalize, renorm_shift > 0. Note + * that if we shift denormalized nonsign by renorm_shift, the unit bit of + * mantissa will shift into exponent, turning the biased exponent into 1, and + * making mantissa normalized (i.e. without leading 1). + */ +#ifdef _MSC_VER + unsigned long nonsign_bsr; + _BitScanReverse(&nonsign_bsr, (unsigned long)nonsign); + uint32_t renorm_shift = (uint32_t)nonsign_bsr ^ 31; +#else + uint32_t renorm_shift = __builtin_clz(nonsign); +#endif + renorm_shift = renorm_shift > 5 ? renorm_shift - 5 : 0; + /* + * Iff half-precision number has exponent of 15, the addition overflows + * it into bit 31, and the subsequent shift turns the high 9 bits + * into 1. Thus inf_nan_mask == 0x7F800000 if the half-precision number + * had exponent of 15 (i.e. was NaN or infinity) 0x00000000 otherwise + */ + const int32_t inf_nan_mask = + ((int32_t)(nonsign + 0x04000000) >> 8) & INT32_C(0x7F800000); + /* + * Iff nonsign is 0, it overflows into 0xFFFFFFFF, turning bit 31 + * into 1. Otherwise, bit 31 remains 0. The signed shift right by 31 + * broadcasts bit 31 into all bits of the zero_mask. Thus zero_mask == + * 0xFFFFFFFF if the half-precision number was zero (+0.0h or -0.0h) + * 0x00000000 otherwise + */ + const int32_t zero_mask = (int32_t)(nonsign - 1) >> 31; + /* + * 1. Shift nonsign left by renorm_shift to normalize it (if the input + * was denormal) + * 2. Shift nonsign right by 3 so the exponent (5 bits originally) + * becomes an 8-bit field and 10-bit mantissa shifts into the 10 high + * bits of the 23-bit mantissa of IEEE single-precision number. + * 3. Add 0x70 to the exponent (starting at bit 23) to compensate the + * different in exponent bias (0x7F for single-precision number less 0xF + * for half-precision number). + * 4. Subtract renorm_shift from the exponent (starting at bit 23) to + * account for renormalization. As renorm_shift is less than 0x70, this + * can be combined with step 3. + * 5. Binary OR with inf_nan_mask to turn the exponent into 0xFF if the + * input was NaN or infinity. + * 6. Binary ANDNOT with zero_mask to turn the mantissa and exponent + * into zero if the input was zero. + * 7. Combine with the sign of the input number. + */ + return sign | + ((((nonsign << renorm_shift >> 3) + ((0x70 - renorm_shift) << 23)) | + inf_nan_mask) & + ~zero_mask); +} + +/* + * Convert a 16-bit floating-point number in IEEE half-precision format, in bit + * representation, to a 32-bit floating-point number in IEEE single-precision + * format. + * + * @note The implementation relies on IEEE-like (no assumption about rounding + * mode and no operations on denormals) floating-point operations and bitcasts + * between integer and floating-point variables. + */ +C10_HOST_DEVICE inline float fp16_ieee_to_fp32_value(uint16_t h) { +#ifdef C10_X86_F16 + return _cvtsh_ss(h); +#else + /* + * Extend the half-precision floating-point number to 32 bits and shift to the + * upper part of the 32-bit word: + * +---+-----+------------+-------------------+ + * | S |EEEEE|MM MMMM MMMM|0000 0000 0000 0000| + * +---+-----+------------+-------------------+ + * Bits 31 26-30 16-25 0-15 + * + * S - sign bit, E - bits of the biased exponent, M - bits of the mantissa, 0 + * - zero bits. + */ + const uint32_t w = (uint32_t)h << 16; + /* + * Extract the sign of the input number into the high bit of the 32-bit word: + * + * +---+----------------------------------+ + * | S |0000000 00000000 00000000 00000000| + * +---+----------------------------------+ + * Bits 31 0-31 + */ + const uint32_t sign = w & UINT32_C(0x80000000); + /* + * Extract mantissa and biased exponent of the input number into the high bits + * of the 32-bit word: + * + * +-----+------------+---------------------+ + * |EEEEE|MM MMMM MMMM|0 0000 0000 0000 0000| + * +-----+------------+---------------------+ + * Bits 27-31 17-26 0-16 + */ + const uint32_t two_w = w + w; + + /* + * Shift mantissa and exponent into bits 23-28 and bits 13-22 so they become + * mantissa and exponent of a single-precision floating-point number: + * + * S|Exponent | Mantissa + * +-+---+-----+------------+----------------+ + * |0|000|EEEEE|MM MMMM MMMM|0 0000 0000 0000| + * +-+---+-----+------------+----------------+ + * Bits | 23-31 | 0-22 + * + * Next, there are some adjustments to the exponent: + * - The exponent needs to be corrected by the difference in exponent bias + * between single-precision and half-precision formats (0x7F - 0xF = 0x70) + * - Inf and NaN values in the inputs should become Inf and NaN values after + * conversion to the single-precision number. Therefore, if the biased + * exponent of the half-precision input was 0x1F (max possible value), the + * biased exponent of the single-precision output must be 0xFF (max possible + * value). We do this correction in two steps: + * - First, we adjust the exponent by (0xFF - 0x1F) = 0xE0 (see exp_offset + * below) rather than by 0x70 suggested by the difference in the exponent bias + * (see above). + * - Then we multiply the single-precision result of exponent adjustment by + * 2**(-112) to reverse the effect of exponent adjustment by 0xE0 less the + * necessary exponent adjustment by 0x70 due to difference in exponent bias. + * The floating-point multiplication hardware would ensure than Inf and + * NaN would retain their value on at least partially IEEE754-compliant + * implementations. + * + * Note that the above operations do not handle denormal inputs (where biased + * exponent == 0). However, they also do not operate on denormal inputs, and + * do not produce denormal results. + */ + constexpr uint32_t exp_offset = UINT32_C(0xE0) << 23; + // const float exp_scale = 0x1.0p-112f; + constexpr uint32_t scale_bits = (uint32_t)15 << 23; + float exp_scale_val = 0; +#if defined(_MSC_VER) && defined(__clang__) + __builtin_memcpy(&exp_scale_val, &scale_bits, sizeof(exp_scale_val)); +#else + std::memcpy(&exp_scale_val, &scale_bits, sizeof(exp_scale_val)); +#endif + + const float exp_scale = exp_scale_val; + const float normalized_value = + fp32_from_bits((two_w >> 4) + exp_offset) * exp_scale; + + /* + * Convert denormalized half-precision inputs into single-precision results + * (always normalized). Zero inputs are also handled here. + * + * In a denormalized number the biased exponent is zero, and mantissa has + * on-zero bits. First, we shift mantissa into bits 0-9 of the 32-bit word. + * + * zeros | mantissa + * +---------------------------+------------+ + * |0000 0000 0000 0000 0000 00|MM MMMM MMMM| + * +---------------------------+------------+ + * Bits 10-31 0-9 + * + * Now, remember that denormalized half-precision numbers are represented as: + * FP16 = mantissa * 2**(-24). + * The trick is to construct a normalized single-precision number with the + * same mantissa and thehalf-precision input and with an exponent which would + * scale the corresponding mantissa bits to 2**(-24). A normalized + * single-precision floating-point number is represented as: FP32 = (1 + + * mantissa * 2**(-23)) * 2**(exponent - 127) Therefore, when the biased + * exponent is 126, a unit change in the mantissa of the input denormalized + * half-precision number causes a change of the constructed single-precision + * number by 2**(-24), i.e. the same amount. + * + * The last step is to adjust the bias of the constructed single-precision + * number. When the input half-precision number is zero, the constructed + * single-precision number has the value of FP32 = 1 * 2**(126 - 127) = + * 2**(-1) = 0.5 Therefore, we need to subtract 0.5 from the constructed + * single-precision number to get the numerical equivalent of the input + * half-precision number. + */ + constexpr uint32_t magic_mask = UINT32_C(126) << 23; + constexpr float magic_bias = 0.5f; + const float denormalized_value = + fp32_from_bits((two_w >> 17) | magic_mask) - magic_bias; + + /* + * - Choose either results of conversion of input as a normalized number, or + * as a denormalized number, depending on the input exponent. The variable + * two_w contains input exponent in bits 27-31, therefore if its smaller than + * 2**27, the input is either a denormal number, or zero. + * - Combine the result of conversion of exponent and mantissa with the sign + * of the input number. + */ + constexpr uint32_t denormalized_cutoff = UINT32_C(1) << 27; + const uint32_t result = sign | + (two_w < denormalized_cutoff ? fp32_to_bits(denormalized_value) + : fp32_to_bits(normalized_value)); + return fp32_from_bits(result); +#endif // C10_X86_F16 +} + +/* + * Convert a 32-bit floating-point number in IEEE single-precision format to a + * 16-bit floating-point number in IEEE half-precision format, in bit + * representation. + * + * @note The implementation relies on IEEE-like (no assumption about rounding + * mode and no operations on denormals) floating-point operations and bitcasts + * between integer and floating-point variables. + */ +inline uint16_t fp16_ieee_from_fp32_value(float f) { +#ifdef C10_X86_F16 + return _cvtss_sh(f, _MM_FROUND_TO_NEAREST_INT); +#else + // const float scale_to_inf = 0x1.0p+112f; + // const float scale_to_zero = 0x1.0p-110f; + constexpr uint32_t scale_to_inf_bits = (uint32_t)239 << 23; + constexpr uint32_t scale_to_zero_bits = (uint32_t)17 << 23; + float scale_to_inf_val = 0, scale_to_zero_val = 0; + std::memcpy(&scale_to_inf_val, &scale_to_inf_bits, sizeof(scale_to_inf_val)); + std::memcpy( + &scale_to_zero_val, &scale_to_zero_bits, sizeof(scale_to_zero_val)); + const float scale_to_inf = scale_to_inf_val; + const float scale_to_zero = scale_to_zero_val; + +#if defined(_MSC_VER) && _MSC_VER == 1916 + float base = ((signbit(f) != 0 ? -f : f) * scale_to_inf) * scale_to_zero; +#else + float base = (fabsf(f) * scale_to_inf) * scale_to_zero; +#endif + + const uint32_t w = fp32_to_bits(f); + const uint32_t shl1_w = w + w; + const uint32_t sign = w & UINT32_C(0x80000000); + uint32_t bias = shl1_w & UINT32_C(0xFF000000); + if (bias < UINT32_C(0x71000000)) { + bias = UINT32_C(0x71000000); + } + + base = fp32_from_bits((bias >> 1) + UINT32_C(0x07800000)) + base; + const uint32_t bits = fp32_to_bits(base); + const uint32_t exp_bits = (bits >> 13) & UINT32_C(0x00007C00); + const uint32_t mantissa_bits = bits & UINT32_C(0x00000FFF); + const uint32_t nonsign = exp_bits + mantissa_bits; + return static_cast( + (sign >> 16) | + (shl1_w > UINT32_C(0xFF000000) ? UINT16_C(0x7E00) : nonsign)); +#endif // C10_X86_F16 +} + +#ifdef C10_X86_F16 +#undef C10_X86_F16 +#endif // C10_X86_F16 + +#if defined(__aarch64__) && !defined(__CUDACC__) +inline float16_t fp16_from_bits(uint16_t h) { + return c10::bit_cast(h); +} + +inline uint16_t fp16_to_bits(float16_t f) { + return c10::bit_cast(f); +} + +// According to https://godbolt.org/z/frExdbsWG it would translate to single +// fcvt s0, h0 +inline float native_fp16_to_fp32_value(uint16_t h) { + return static_cast(fp16_from_bits(h)); +} + +inline uint16_t native_fp16_from_fp32_value(float f) { + return fp16_to_bits(static_cast(f)); +} +#endif + +} // namespace detail + +struct alignas(2) Half { + unsigned short x; + + struct from_bits_t {}; + C10_HOST_DEVICE static constexpr from_bits_t from_bits() { + return from_bits_t(); + } + + // HIP wants __host__ __device__ tag, CUDA does not +#if defined(USE_ROCM) + C10_HOST_DEVICE Half() = default; +#else + Half() = default; +#endif + + constexpr C10_HOST_DEVICE Half(unsigned short bits, from_bits_t) : x(bits) {} +#if defined(__aarch64__) && !defined(__CUDACC__) + inline Half(float16_t value); + inline operator float16_t() const; +#else + inline C10_HOST_DEVICE Half(float value); + inline C10_HOST_DEVICE operator float() const; +#endif + +#if defined(__CUDACC__) || defined(__HIPCC__) + inline C10_HOST_DEVICE Half(const __half& value); + inline C10_HOST_DEVICE operator __half() const; +#endif +#ifdef SYCL_LANGUAGE_VERSION + inline C10_HOST_DEVICE Half(const sycl::half& value); + inline C10_HOST_DEVICE operator sycl::half() const; +#endif +}; + +C10_API inline std::ostream& operator<<(std::ostream& out, const Half& value) { + out << (float)value; + return out; +} + +} // namespace c10 + +#include // IWYU pragma: keep diff --git a/phivenv/Lib/site-packages/torch/include/c10/util/IdWrapper.h b/phivenv/Lib/site-packages/torch/include/c10/util/IdWrapper.h new file mode 100644 index 0000000000000000000000000000000000000000..086f456fc27ab1866a5c4ad9f9e651eed91ada68 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/util/IdWrapper.h @@ -0,0 +1,77 @@ +#pragma once + +#include +#include +#include + +namespace c10 { + +/** + * This template simplifies generation of simple classes that wrap an id + * in a typesafe way. Namely, you can use it to create a very lightweight + * type that only offers equality comparators and hashing. Example: + * + * struct MyIdType final : IdWrapper { + * constexpr explicit MyIdType(uint32_t id): IdWrapper(id) {} + * }; + * + * Then in the global top level namespace: + * + * C10_DEFINE_HASH_FOR_IDWRAPPER(MyIdType); + * + * That's it - equality operators and hash functions are automatically defined + * for you, given the underlying type supports it. + */ +template +class IdWrapper { + public: + using underlying_type = UnderlyingType; + using concrete_type = ConcreteType; + + protected: + constexpr explicit IdWrapper(underlying_type id) noexcept( + noexcept(underlying_type(std::declval()))) + : id_(id) {} + + constexpr underlying_type underlyingId() const + noexcept(noexcept(underlying_type(std::declval()))) { + return id_; + } + + private: + friend size_t hash_value(const concrete_type& v) { + return std::hash()(v.id_); + } + + // TODO Making operator== noexcept if underlying type is noexcept equality + // comparable doesn't work with GCC 4.8. + // Fix this once we don't need GCC 4.8 anymore. + friend constexpr bool operator==( + const concrete_type& lhs, + const concrete_type& rhs) noexcept { + return lhs.id_ == rhs.id_; + } + + // TODO Making operator!= noexcept if operator== is noexcept doesn't work with + // GCC 4.8. + // Fix this once we don't need GCC 4.8 anymore. + friend constexpr bool operator!=( + const concrete_type& lhs, + const concrete_type& rhs) noexcept { + return !(lhs == rhs); + } + + underlying_type id_; +}; + +} // namespace c10 + +#define C10_DEFINE_HASH_FOR_IDWRAPPER(ClassName) \ + namespace std { \ + template <> \ + struct hash { \ + size_t operator()(ClassName x) const { \ + return hash_value(x); \ + } \ + }; \ + } diff --git a/phivenv/Lib/site-packages/torch/include/c10/util/IntrusiveList.h b/phivenv/Lib/site-packages/torch/include/c10/util/IntrusiveList.h new file mode 100644 index 0000000000000000000000000000000000000000..af298e3e6946941d92584504a4fb5e81c399742d --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/util/IntrusiveList.h @@ -0,0 +1,206 @@ +#pragma once + +#include + +namespace c10 { + +template +class IntrusiveList; + +class IntrusiveListHook { + template + friend class ListIterator; + + template + friend class IntrusiveList; + + IntrusiveListHook* next_{nullptr}; + IntrusiveListHook* prev_{nullptr}; + + void link_before(IntrusiveListHook* next_node) { + next_ = next_node; + prev_ = next_node->prev_; + next_node->prev_ = this; + prev_->next_ = this; + } + + public: + IntrusiveListHook() : next_(this), prev_(this) {} + + IntrusiveListHook(const IntrusiveListHook&) = delete; + IntrusiveListHook& operator=(const IntrusiveListHook&) = delete; + IntrusiveListHook(IntrusiveListHook&&) = delete; + IntrusiveListHook& operator=(IntrusiveListHook&&) = delete; + + void unlink() { + TORCH_CHECK(is_linked()); + next_->prev_ = prev_; + prev_->next_ = next_; + next_ = this; + prev_ = this; + } + + ~IntrusiveListHook() { + if (is_linked()) { + unlink(); + } + } + + bool is_linked() const { + return next_ != this; + } +}; + +template +class ListIterator { + static_assert(std::is_same_v, IntrusiveListHook>); + static_assert(std::is_base_of_v); + P* ptr_; + + friend class IntrusiveList; + + public: + using iterator_category = std::bidirectional_iterator_tag; + using value_type = std::conditional_t, const T, T>; + using difference_type = std::ptrdiff_t; + using pointer = value_type*; + using reference = value_type&; + + explicit ListIterator(P* ptr) : ptr_(ptr) {} + ~ListIterator() = default; + + ListIterator(const ListIterator&) = default; + ListIterator& operator=(const ListIterator&) = default; + ListIterator(ListIterator&&) = default; + ListIterator& operator=(ListIterator&&) = default; + + template < + typename Q, + class = std::enable_if_t && !std::is_const_v>> + ListIterator(const ListIterator& rhs) : ptr_(rhs.ptr_) {} + + template < + typename Q, + class = std::enable_if_t && !std::is_const_v>> + ListIterator& operator=(const ListIterator& rhs) { + ptr_ = rhs.ptr_; + return *this; + } + + template + bool operator==(const ListIterator& other) const { + return ptr_ == other.ptr_; + } + + template + bool operator!=(const ListIterator& other) const { + return !(*this == other); + } + + auto& operator*() const { + return static_cast(*ptr_); + } + + ListIterator& operator++() { + TORCH_CHECK(ptr_); + ptr_ = ptr_->next_; + return *this; + } + + ListIterator& operator--() { + TORCH_CHECK(ptr_); + ptr_ = ptr_->prev_; + return *this; + } + + auto* operator->() const { + return static_cast(ptr_); + } +}; + +template +class IntrusiveList { + static_assert(std::is_base_of_v); + + public: + IntrusiveList() = default; + IntrusiveList(const std::initializer_list>& items) { + for (auto& item : items) { + insert(this->end(), item); + } + } + ~IntrusiveList() { + while (head_.is_linked()) { + head_.next_->unlink(); + } + } + IntrusiveList(const IntrusiveList&) = delete; + IntrusiveList& operator=(const IntrusiveList&) = delete; + IntrusiveList(IntrusiveList&&) = delete; + IntrusiveList& operator=(IntrusiveList&&) = delete; + + using iterator = ListIterator; + using const_iterator = ListIterator; + + auto begin() const { + return ++const_iterator{&head_}; + } + + auto begin() { + return ++iterator{&head_}; + } + + auto end() const { + return const_iterator{&head_}; + } + + auto end() { + return iterator{&head_}; + } + + auto rbegin() const { + return std::reverse_iterator{end()}; + } + + auto rbegin() { + return std::reverse_iterator{end()}; + } + + auto rend() const { + return std::reverse_iterator{begin()}; + } + + auto rend() { + return std::reverse_iterator{begin()}; + } + + auto iterator_to(const T& n) const { + return const_iterator{&n}; + } + + auto iterator_to(T& n) { + return iterator{&n}; + } + + iterator insert(iterator pos, T& n) { + n.link_before(pos.ptr_); + return iterator{&n}; + } + + size_t size() const { + size_t ret = 0; + for ([[maybe_unused]] auto& _ : *this) { + ret++; + } + return ret; + } + + bool empty() const { + return !head_.is_linked(); + } + + private: + IntrusiveListHook head_; +}; + +} // namespace c10 diff --git a/phivenv/Lib/site-packages/torch/include/c10/util/Lazy.h b/phivenv/Lib/site-packages/torch/include/c10/util/Lazy.h new file mode 100644 index 0000000000000000000000000000000000000000..d75d621a15217455441273c2b252bd0b46a0e54a --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/util/Lazy.h @@ -0,0 +1,120 @@ +#pragma once + +#include +#include + +namespace c10 { + +/** + * Thread-safe lazy value with opportunistic concurrency: on concurrent first + * access, the factory may be called by multiple threads, but only one result is + * stored and its reference returned to all the callers. + * + * Value is heap-allocated; this optimizes for the case in which the value is + * never actually computed. + */ +template +class OptimisticLazy { + public: + OptimisticLazy() = default; + OptimisticLazy(const OptimisticLazy& other) { + if (T* value = other.value_.load(std::memory_order_acquire)) { + value_ = new T(*value); + } + } + OptimisticLazy(OptimisticLazy&& other) noexcept + : value_(other.value_.exchange(nullptr, std::memory_order_acq_rel)) {} + ~OptimisticLazy() { + reset(); + } + + template + T& ensure(const Factory& factory) { + if (T* value = value_.load(std::memory_order_acquire)) { + return *value; + } + T* value = new T(factory()); + T* old = nullptr; + if (!value_.compare_exchange_strong( + old, value, std::memory_order_release, std::memory_order_acquire)) { + delete value; + value = old; + } + return *value; + } + + // The following methods are not thread-safe: they should not be called + // concurrently with any other method. + + OptimisticLazy& operator=(const OptimisticLazy& other) { + *this = OptimisticLazy{other}; + return *this; + } + + OptimisticLazy& operator=(OptimisticLazy&& other) noexcept { + if (this != &other) { + reset(); + value_.store( + other.value_.exchange(nullptr, std::memory_order_acquire), + std::memory_order_release); + } + return *this; + } + + void reset() { + if (T* old = value_.load(std::memory_order_relaxed)) { + value_.store(nullptr, std::memory_order_relaxed); + delete old; + } + } + + private: + std::atomic value_{nullptr}; +}; + +/** + * Interface for a value that is computed on first access. + */ +template +class LazyValue { + public: + virtual ~LazyValue() = default; + + virtual const T& get() const = 0; +}; + +/** + * Convenience thread-safe LazyValue implementation with opportunistic + * concurrency. + */ +template +class OptimisticLazyValue : public LazyValue { + public: + const T& get() const override { + return value_.ensure([this] { return compute(); }); + } + + private: + virtual T compute() const = 0; + + mutable OptimisticLazy value_; +}; + +/** + * Convenience immutable (thus thread-safe) LazyValue implementation for cases + * in which the value is not actually lazy. + */ +template +class PrecomputedLazyValue : public LazyValue { + public: + PrecomputedLazyValue(T value) : value_(std::move(value)) {} + + const T& get() const override { + return value_; + } + + private: + T value_; +}; + +} // namespace c10 diff --git a/phivenv/Lib/site-packages/torch/include/c10/util/LeftRight.h b/phivenv/Lib/site-packages/torch/include/c10/util/LeftRight.h new file mode 100644 index 0000000000000000000000000000000000000000..4d9c707c2fc94053f1f01210d4b8672c14366384 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/util/LeftRight.h @@ -0,0 +1,229 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace c10 { + +namespace detail { + +struct IncrementRAII final { + public: + explicit IncrementRAII(std::atomic* counter) : _counter(counter) { + _counter->fetch_add(1); + } + + ~IncrementRAII() { + _counter->fetch_sub(1); + } + IncrementRAII(IncrementRAII&&) = delete; + IncrementRAII& operator=(IncrementRAII&&) = delete; + + private: + std::atomic* _counter; + + C10_DISABLE_COPY_AND_ASSIGN(IncrementRAII); +}; + +} // namespace detail + +// LeftRight wait-free readers synchronization primitive +// https://hal.archives-ouvertes.fr/hal-01207881/document +// +// LeftRight is quite easy to use (it can make an arbitrary +// data structure permit wait-free reads), but it has some +// particular performance characteristics you should be aware +// of if you're deciding to use it: +// +// - Reads still incur an atomic write (this is how LeftRight +// keeps track of how long it needs to keep around the old +// data structure) +// +// - Writes get executed twice, to keep both the left and right +// versions up to date. So if your write is expensive or +// nondeterministic, this is also an inappropriate structure +// +// LeftRight is used fairly rarely in PyTorch's codebase. If you +// are still not sure if you need it or not, consult your local +// C++ expert. +// +template +class LeftRight final { + public: + template + explicit LeftRight(const Args&... args) + : _counters{{{0}, {0}}}, + _foregroundCounterIndex(0), + _foregroundDataIndex(0), + _data{{T{args...}, T{args...}}} {} + + // Copying and moving would not be threadsafe. + // Needs more thought and careful design to make that work. + LeftRight(const LeftRight&) = delete; + LeftRight(LeftRight&&) noexcept = delete; + LeftRight& operator=(const LeftRight&) = delete; + LeftRight& operator=(LeftRight&&) noexcept = delete; + + ~LeftRight() { + // wait until any potentially running writers are finished + { + std::unique_lock lock(_writeMutex); + } + + // wait until any potentially running readers are finished + while (_counters[0].load() != 0 || _counters[1].load() != 0) { + std::this_thread::yield(); + } + } + + template + auto read(F&& readFunc) const { + detail::IncrementRAII _increment_counter( + &_counters[_foregroundCounterIndex.load()]); + + return std::forward(readFunc)(_data[_foregroundDataIndex.load()]); + } + + // Throwing an exception in writeFunc is ok but causes the state to be either + // the old or the new state, depending on if the first or the second call to + // writeFunc threw. + template + auto write(F&& writeFunc) { + std::unique_lock lock(_writeMutex); + + return _write(std::forward(writeFunc)); + } + + private: + template + auto _write(const F& writeFunc) { + /* + * Assume, A is in background and B in foreground. In simplified terms, we + * want to do the following: + * 1. Write to A (old background) + * 2. Switch A/B + * 3. Write to B (new background) + * + * More detailed algorithm (explanations on why this is important are below + * in code): + * 1. Write to A + * 2. Switch A/B data pointers + * 3. Wait until A counter is zero + * 4. Switch A/B counters + * 5. Wait until B counter is zero + * 6. Write to B + */ + + auto localDataIndex = _foregroundDataIndex.load(); + + // 1. Write to A + _callWriteFuncOnBackgroundInstance(writeFunc, localDataIndex); + + // 2. Switch A/B data pointers + localDataIndex = localDataIndex ^ 1; + _foregroundDataIndex = localDataIndex; + + /* + * 3. Wait until A counter is zero + * + * In the previous write run, A was foreground and B was background. + * There was a time after switching _foregroundDataIndex (B to foreground) + * and before switching _foregroundCounterIndex, in which new readers could + * have read B but incremented A's counter. + * + * In this current run, we just switched _foregroundDataIndex (A back to + * foreground), but before writing to the new background B, we have to make + * sure A's counter was zero briefly, so all these old readers are gone. + */ + auto localCounterIndex = _foregroundCounterIndex.load(); + _waitForBackgroundCounterToBeZero(localCounterIndex); + + /* + * 4. Switch A/B counters + * + * Now that we know all readers on B are really gone, we can switch the + * counters and have new readers increment A's counter again, which is the + * correct counter since they're reading A. + */ + localCounterIndex = localCounterIndex ^ 1; + _foregroundCounterIndex = localCounterIndex; + + /* + * 5. Wait until B counter is zero + * + * This waits for all the readers on B that came in while both data and + * counter for B was in foreground, i.e. normal readers that happened + * outside of that brief gap between switching data and counter. + */ + _waitForBackgroundCounterToBeZero(localCounterIndex); + + // 6. Write to B + return _callWriteFuncOnBackgroundInstance(writeFunc, localDataIndex); + } + + template + auto _callWriteFuncOnBackgroundInstance( + const F& writeFunc, + uint8_t localDataIndex) { + try { + return writeFunc(_data[localDataIndex ^ 1]); + } catch (...) { + // recover invariant by copying from the foreground instance + _data[localDataIndex ^ 1] = _data[localDataIndex]; + // rethrow + throw; + } + } + + void _waitForBackgroundCounterToBeZero(uint8_t counterIndex) { + while (_counters[counterIndex ^ 1].load() != 0) { + std::this_thread::yield(); + } + } + + mutable std::array, 2> _counters; + std::atomic _foregroundCounterIndex; + std::atomic _foregroundDataIndex; + std::array _data; + std::mutex _writeMutex; +}; + +// RWSafeLeftRightWrapper is API compatible with LeftRight and uses a +// read-write lock to protect T (data). +template +class RWSafeLeftRightWrapper final { + public: + template + explicit RWSafeLeftRightWrapper(const Args&... args) : data_{args...} {} + + // RWSafeLeftRightWrapper is not copyable or moveable since LeftRight + // is not copyable or moveable. + RWSafeLeftRightWrapper(const RWSafeLeftRightWrapper&) = delete; + RWSafeLeftRightWrapper(RWSafeLeftRightWrapper&&) noexcept = delete; + RWSafeLeftRightWrapper& operator=(const RWSafeLeftRightWrapper&) = delete; + RWSafeLeftRightWrapper& operator=(RWSafeLeftRightWrapper&&) noexcept = delete; + ~RWSafeLeftRightWrapper() = default; + + template + // NOLINTNEXTLINE(cppcoreguidelines-missing-std-forward) + auto read(F&& readFunc) const { + return data_.withLock( + [&readFunc](T const& data) { return std::forward(readFunc)(data); }); + } + + template + // NOLINTNEXTLINE(cppcoreguidelines-missing-std-forward) + auto write(F&& writeFunc) { + return data_.withLock( + [&writeFunc](T& data) { return std::forward(writeFunc)(data); }); + } + + private: + c10::Synchronized data_; +}; + +} // namespace c10 diff --git a/phivenv/Lib/site-packages/torch/include/c10/util/Load.h b/phivenv/Lib/site-packages/torch/include/c10/util/Load.h new file mode 100644 index 0000000000000000000000000000000000000000..a2c9f97026a628fb3a221d57bb85fc41ca7f832a --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/util/Load.h @@ -0,0 +1,38 @@ +#pragma once +#include +#include + +namespace c10 { +namespace detail { + +template +struct LoadImpl { + C10_HOST_DEVICE static T apply(const void* src) { + return *reinterpret_cast(src); + } +}; + +template <> +struct LoadImpl { + C10_HOST_DEVICE static bool apply(const void* src) { + static_assert(sizeof(bool) == sizeof(char)); + // NOTE: [Loading boolean values] + // Protect against invalid boolean values by loading as a byte + // first, then converting to bool (see gh-54789). + return *reinterpret_cast(src); + } +}; + +} // namespace detail + +template +C10_HOST_DEVICE constexpr T load(const void* src) { + return c10::detail::LoadImpl::apply(src); +} + +template +C10_HOST_DEVICE constexpr scalar_t load(const scalar_t* src) { + return c10::detail::LoadImpl::apply(src); +} + +} // namespace c10 diff --git a/phivenv/Lib/site-packages/torch/include/c10/util/Logging.h b/phivenv/Lib/site-packages/torch/include/c10/util/Logging.h new file mode 100644 index 0000000000000000000000000000000000000000..621fc1adfea61243cbff9279ec66fee23c75cab6 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/util/Logging.h @@ -0,0 +1,372 @@ +#ifndef C10_UTIL_LOGGING_H_ +#define C10_UTIL_LOGGING_H_ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +// CAFFE2_LOG_THRESHOLD is a compile time flag that would allow us to turn off +// logging at compile time so no logging message below that level is produced +// at all. The value should be between INT_MIN and CAFFE_FATAL. +#ifndef CAFFE2_LOG_THRESHOLD +// If we have not defined the compile time log threshold, we keep all the +// log cases. +#define CAFFE2_LOG_THRESHOLD INT_MIN +#endif // CAFFE2_LOG_THRESHOLD + +// Below are different implementations for glog and non-glog cases. +#ifdef C10_USE_GLOG +#include +#else // !C10_USE_GLOG +#include +#endif // C10_USE_GLOG + +C10_DECLARE_int(caffe2_log_level); +C10_DECLARE_bool(caffe2_use_fatal_for_enforce); + +// Some versions of GLOG support less-spammy version of LOG_EVERY_MS. If it's +// not available - just short-circuit to the always working one one. +// We define the C10_ name to avoid confusing other files +#ifdef LOG_EVERY_MS +#define C10_LOG_EVERY_MS(severity, ms) LOG_EVERY_MS(severity, ms) +#else +#define C10_LOG_EVERY_MS(severity, ms) LOG(severity) +#endif + +// Same for LOG_FIRST_N +#ifdef LOG_FIRST_N +#define C10_LOG_FIRST_N(severity, n) LOG_FIRST_N(severity, n) +#else +#define C10_LOG_FIRST_N(severity, n) LOG(severity) +#endif + +// Same for LOG_EVERY_N +#ifdef LOG_EVERY_N +#define C10_LOG_EVERY_N(severity, n) LOG_EVERY_N(severity, n) +#else +#define C10_LOG_EVERY_N(severity, n) LOG(severity) +#endif + +namespace c10 { + +#if !defined(C10_NODEPRECATED) +using std::string; +#endif + +// Functions that we use for initialization. +C10_API bool InitCaffeLogging(int* argc, char** argv); +C10_API void UpdateLoggingLevelsFromFlags(); + +[[noreturn]] C10_API void ThrowEnforceNotMet( + const char* file, + const int line, + const char* condition, + const std::string& msg, + const void* caller = nullptr); + +[[noreturn]] C10_API void ThrowEnforceNotMet( + const char* file, + const int line, + const char* condition, + const char* msg, + const void* caller = nullptr); + +[[noreturn]] C10_API inline void ThrowEnforceNotMet( + const char* file, + const int line, + const char* condition, + detail::CompileTimeEmptyString /*msg*/, + const void* caller = nullptr) { + ThrowEnforceNotMet(file, line, condition, "", caller); +} + +[[noreturn]] C10_API void ThrowEnforceFiniteNotMet( + const char* file, + const int line, + const char* condition, + const std::string& msg, + const void* caller = nullptr); + +[[noreturn]] C10_API void ThrowEnforceFiniteNotMet( + const char* file, + const int line, + const char* condition, + const char* msg, + const void* caller = nullptr); + +[[noreturn]] C10_API inline void ThrowEnforceFiniteNotMet( + const char* file, + const int line, + const char* condition, + detail::CompileTimeEmptyString /*msg*/, + const void* caller = nullptr) { + ThrowEnforceFiniteNotMet(file, line, condition, "", caller); +} + +constexpr bool IsUsingGoogleLogging() { +#ifdef C10_USE_GLOG + return true; +#else + return false; +#endif +} + +/** + * A utility to allow one to show log info to stderr after the program starts. + * + * This is similar to calling GLOG's --logtostderr, or setting caffe2_log_level + * to smaller than INFO. You are recommended to only use this in a few sparse + * cases, such as when you want to write a tutorial or something. Normally, use + * the commandline flags to set the log level. + */ +C10_API void ShowLogInfoToStderr(); + +C10_API void SetStackTraceFetcher(std::function<::c10::Backtrace()> fetcher); + +/** + * Convenience function for non-lazy stack trace fetchers. The Backtrace + * overload should be preferred when stringifying the backtrace is expensive. + */ +C10_API void SetStackTraceFetcher(std::function fetcher); + +using EnforceNotMet = ::c10::Error; + +#define CAFFE_ENFORCE(condition, ...) \ + do { \ + if (C10_UNLIKELY(!(condition))) { \ + ::c10::ThrowEnforceNotMet( \ + __FILE__, __LINE__, #condition, ::c10::str(__VA_ARGS__)); \ + } \ + } while (false) + +#define CAFFE_ENFORCE_FINITE(condition, ...) \ + do { \ + if (C10_UNLIKELY(!(condition))) { \ + ::c10::ThrowEnforceFiniteNotMet( \ + __FILE__, __LINE__, #condition, ::c10::str(__VA_ARGS__)); \ + } \ + } while (false) + +#define CAFFE_ENFORCE_WITH_CALLER(condition, ...) \ + do { \ + if (C10_UNLIKELY(!(condition))) { \ + ::c10::ThrowEnforceNotMet( \ + __FILE__, __LINE__, #condition, ::c10::str(__VA_ARGS__), this); \ + } \ + } while (false) + +#define CAFFE_THROW(...) \ + ::c10::ThrowEnforceNotMet(__FILE__, __LINE__, "", ::c10::str(__VA_ARGS__)) + +/** + * Rich logging messages + * + * CAFFE_ENFORCE_THAT can be used with one of the "checker functions" that + * capture input argument values and add it to the exception message. E.g. + * `CAFFE_ENFORCE_THAT(Equals(foo(x), bar(y)), "Optional additional message")` + * would evaluate both foo and bar only once and if the results are not equal - + * include them in the exception message. + * + * Some of the basic checker functions like Equals or Greater are already + * defined below. Other header might define customized checkers by adding + * functions to caffe2::enforce_detail namespace. For example: + * + * namespace caffe2 { namespace enforce_detail { + * inline EnforceFailMessage IsVector(const vector& shape) { + * if (shape.size() == 1) { return EnforceOK(); } + * return c10::str("Shape ", shape, " is not a vector"); + * } + * }} + * + * With further usages like `CAFFE_ENFORCE_THAT(IsVector(Input(0).dims()))` + * + * Convenient wrappers for binary operations like CAFFE_ENFORCE_EQ are provided + * too. Please use them instead of TORCH_CHECK_EQ and friends for failures in + * user-provided input. + */ + +namespace enforce_detail { + +template +std::string enforceFailMsgImpl(const T1& x, const T2& y) { + return c10::str(x, " vs ", y); +} + +template +std::string enforceFailMsgImpl(const T1& x, const T2& y, const Args&... args) { + return c10::str(x, " vs ", y, ". ", args...); +} + +template +void enforceThatImpl( + Pred p, + const T1& lhs, + const T2& rhs, + const char* file, + int line, + const char* expr, + const void* caller, + GetFailMsgFunc getFailMsg) { + if (C10_UNLIKELY(!(p(lhs, rhs)))) { + ::c10::ThrowEnforceNotMet(file, line, expr, getFailMsg(lhs, rhs), caller); + } +} + +#define CAFFE_ENFORCE_THAT_IMPL(op, lhs, rhs, expr, ...) \ + ::c10::enforce_detail::enforceThatImpl( \ + op, \ + (lhs), \ + (rhs), \ + __FILE__, \ + __LINE__, \ + expr, \ + nullptr, \ + [&](const auto& arg1, const auto& arg2) { \ + return ::c10::enforce_detail::enforceFailMsgImpl( \ + arg1, arg2, ##__VA_ARGS__); \ + }) + +#define CAFFE_ENFORCE_THAT_IMPL_WITH_CALLER(op, lhs, rhs, expr, ...) \ + ::c10::enforce_detail::enforceThatImpl( \ + op, \ + (lhs), \ + (rhs), \ + __FILE__, \ + __LINE__, \ + expr, \ + this, \ + [&](const auto& arg1, const auto& arg2) { \ + return ::c10::enforce_detail::enforceFailMsgImpl( \ + arg1, arg2, ##__VA_ARGS__); \ + }) + +} // namespace enforce_detail + +#define CAFFE_ENFORCE_THAT(cmp, op, lhs, rhs, ...) \ + CAFFE_ENFORCE_THAT_IMPL(cmp, lhs, rhs, #lhs " " #op " " #rhs, ##__VA_ARGS__) + +#define CAFFE_ENFORCE_BINARY_OP(cmp, op, x, y, ...) \ + CAFFE_ENFORCE_THAT_IMPL(cmp, x, y, #x " " #op " " #y, ##__VA_ARGS__) +#define CAFFE_ENFORCE_EQ(x, y, ...) \ + CAFFE_ENFORCE_BINARY_OP(std::equal_to(), ==, x, y, ##__VA_ARGS__) +#define CAFFE_ENFORCE_NE(x, y, ...) \ + CAFFE_ENFORCE_BINARY_OP(std::not_equal_to(), !=, x, y, ##__VA_ARGS__) +#define CAFFE_ENFORCE_LE(x, y, ...) \ + CAFFE_ENFORCE_BINARY_OP(std::less_equal(), <=, x, y, ##__VA_ARGS__) +#define CAFFE_ENFORCE_LT(x, y, ...) \ + CAFFE_ENFORCE_BINARY_OP(std::less(), <, x, y, ##__VA_ARGS__) +#define CAFFE_ENFORCE_GE(x, y, ...) \ + CAFFE_ENFORCE_BINARY_OP(std::greater_equal(), >=, x, y, ##__VA_ARGS__) +#define CAFFE_ENFORCE_GT(x, y, ...) \ + CAFFE_ENFORCE_BINARY_OP(std::greater(), >, x, y, ##__VA_ARGS__) + +#define CAFFE_ENFORCE_BINARY_OP_WITH_CALLER(cmp, op, x, y, ...) \ + CAFFE_ENFORCE_THAT_IMPL_WITH_CALLER( \ + cmp, x, y, #x " " #op " " #y, ##__VA_ARGS__) +#define CAFFE_ENFORCE_EQ_WITH_CALLER(x, y, ...) \ + CAFFE_ENFORCE_BINARY_OP_WITH_CALLER( \ + std::equal_to(), ==, x, y, ##__VA_ARGS__) +#define CAFFE_ENFORCE_NE_WITH_CALLER(x, y, ...) \ + CAFFE_ENFORCE_BINARY_OP_WITH_CALLER( \ + std::not_equal_to(), !=, x, y, ##__VA_ARGS__) +#define CAFFE_ENFORCE_LE_WITH_CALLER(x, y, ...) \ + CAFFE_ENFORCE_BINARY_OP_WITH_CALLER( \ + std::less_equal(), <=, x, y, ##__VA_ARGS__) +#define CAFFE_ENFORCE_LT_WITH_CALLER(x, y, ...) \ + CAFFE_ENFORCE_BINARY_OP_WITH_CALLER(std::less(), <, x, y, ##__VA_ARGS__) +#define CAFFE_ENFORCE_GE_WITH_CALLER(x, y, ...) \ + CAFFE_ENFORCE_BINARY_OP_WITH_CALLER( \ + std::greater_equal(), >=, x, y, ##__VA_ARGS__) +#define CAFFE_ENFORCE_GT_WITH_CALLER(x, y, ...) \ + CAFFE_ENFORCE_BINARY_OP_WITH_CALLER( \ + std::greater(), >, x, y, ##__VA_ARGS__) + +struct IValue; +class C10_API EventSampledHandler { + public: + virtual void log( + std::string_view model_id, + const std::vector& args) = 0; + virtual ~EventSampledHandler() = default; +}; + +#define C10_LOG_EVENT_SAMPLED(event, ...) \ + static const std::unique_ptr<::c10::EventSampledHandler>& \ + _##event##EventSampledHandler = ::c10::GetEventSampledHandler(#event); \ + if (_##event##EventSampledHandler) { \ + _##event##EventSampledHandler->log(__VA_ARGS__); \ + } + +// Must be called in the main thread before any other threads are spawned. +C10_API void InitEventSampledHandlers( + std::vector< + std::pair>>); +C10_API const std::unique_ptr& GetEventSampledHandler( + std::string_view); + +/** + * Very lightweight logging for the first time API usage. It's beneficial for + * tracking of individual functionality usage in larger applications. + * + * In order to ensure light-weightedness of logging, we utilize static variable + * trick - LogAPIUsage will be invoked only once and further invocations will + * just do an atomic check. + * + * Example: + * // Logs caller info with an arbitrary text event, if there is a usage. + * C10_LOG_API_USAGE_ONCE("my_api"); + */ +#define C10_LOG_API_USAGE_ONCE(...) \ + [[maybe_unused]] static bool C10_ANONYMOUS_VARIABLE(logFlag) = \ + ::c10::detail::LogAPIUsageFakeReturn(__VA_ARGS__); + +// API usage logging capabilities +C10_API void SetAPIUsageLogger(std::function logger); +C10_API void LogAPIUsage(const std::string& context); + +C10_API void SetAPIUsageMetadataLogger( + std::function& metadata_map)> logger); +C10_API void LogAPIUsageMetadata( + const std::string& context, + const std::map& metadata_map); + +// PyTorch ddp usage logging capabilities +// DDPLoggingData holds data that can be logged in applications +// for analysis and debugging. Data structure is defined in +// c10 directory so that it can be easily imported by both c10 +// and torch files. +struct DDPLoggingData { + // logging fields that are string types. + std::map strs_map; + // logging fields that are int64_t types. + std::map ints_map; +}; + +C10_API void SetPyTorchDDPUsageLogger( + std::function logger); +C10_API void LogPyTorchDDPUsage(const DDPLoggingData& ddpData); + +namespace detail { +// Return value is needed to do the static variable initialization trick +C10_API bool LogAPIUsageFakeReturn(const std::string& context); +} // namespace detail + +// Initializes the c10 logger. +C10_API void initLogging(); + +// Sets the rank, which will be included in log messages +C10_API void SetGlobalRank(int64_t rank); + +} // namespace c10 + +#endif // C10_UTIL_LOGGING_H_ diff --git a/phivenv/Lib/site-packages/torch/include/c10/util/MathConstants.h b/phivenv/Lib/site-packages/torch/include/c10/util/MathConstants.h new file mode 100644 index 0000000000000000000000000000000000000000..ecdf9f34a945b6acb73614a35c637e80da4a6fba --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/util/MathConstants.h @@ -0,0 +1,142 @@ +#pragma once + +#include +#include +#include + +C10_CLANG_DIAGNOSTIC_PUSH() +#if C10_CLANG_HAS_WARNING("-Wimplicit-float-conversion") +C10_CLANG_DIAGNOSTIC_IGNORE("-Wimplicit-float-conversion") +#endif + +namespace c10 { +// TODO: Replace me with inline constexpr variable when C++17 becomes available +namespace detail { +template +C10_HOST_DEVICE inline constexpr T e() { + return static_cast(2.718281828459045235360287471352662); +} + +template +C10_HOST_DEVICE inline constexpr T euler() { + return static_cast(0.577215664901532860606512090082402); +} + +template +C10_HOST_DEVICE inline constexpr T frac_1_pi() { + return static_cast(0.318309886183790671537767526745028); +} + +template +C10_HOST_DEVICE inline constexpr T frac_1_sqrt_pi() { + return static_cast(0.564189583547756286948079451560772); +} + +template +C10_HOST_DEVICE inline constexpr T frac_sqrt_2() { + return static_cast(0.707106781186547524400844362104849); +} + +template +C10_HOST_DEVICE inline constexpr T frac_sqrt_3() { + return static_cast(0.577350269189625764509148780501957); +} + +template +C10_HOST_DEVICE inline constexpr T golden_ratio() { + return static_cast(1.618033988749894848204586834365638); +} + +template +C10_HOST_DEVICE inline constexpr T ln_10() { + return static_cast(2.302585092994045684017991454684364); +} + +template +C10_HOST_DEVICE inline constexpr T ln_2() { + return static_cast(0.693147180559945309417232121458176); +} + +template +C10_HOST_DEVICE inline constexpr T log_10_e() { + return static_cast(0.434294481903251827651128918916605); +} + +template +C10_HOST_DEVICE inline constexpr T log_2_e() { + return static_cast(1.442695040888963407359924681001892); +} + +template +C10_HOST_DEVICE inline constexpr T pi() { + return static_cast(3.141592653589793238462643383279502); +} + +template +C10_HOST_DEVICE inline constexpr T sqrt_2() { + return static_cast(1.414213562373095048801688724209698); +} + +template +C10_HOST_DEVICE inline constexpr T sqrt_3() { + return static_cast(1.732050807568877293527446341505872); +} + +template <> +C10_HOST_DEVICE inline constexpr BFloat16 pi() { + // According to + // https://en.wikipedia.org/wiki/Bfloat16_floating-point_format#Special_values + // pi is encoded as 4049 + return BFloat16(0x4049, BFloat16::from_bits()); +} + +template <> +C10_HOST_DEVICE inline constexpr Half pi() { + return Half(0x4248, Half::from_bits()); +} +} // namespace detail + +template +constexpr T e = c10::detail::e(); + +template +constexpr T euler = c10::detail::euler(); + +template +constexpr T frac_1_pi = c10::detail::frac_1_pi(); + +template +constexpr T frac_1_sqrt_pi = c10::detail::frac_1_sqrt_pi(); + +template +constexpr T frac_sqrt_2 = c10::detail::frac_sqrt_2(); + +template +constexpr T frac_sqrt_3 = c10::detail::frac_sqrt_3(); + +template +constexpr T golden_ratio = c10::detail::golden_ratio(); + +template +constexpr T ln_10 = c10::detail::ln_10(); + +template +constexpr T ln_2 = c10::detail::ln_2(); + +template +constexpr T log_10_e = c10::detail::log_10_e(); + +template +constexpr T log_2_e = c10::detail::log_2_e(); + +template +constexpr T pi = c10::detail::pi(); + +template +constexpr T sqrt_2 = c10::detail::sqrt_2(); + +template +constexpr T sqrt_3 = c10::detail::sqrt_3(); +} // namespace c10 + +C10_CLANG_DIAGNOSTIC_POP() diff --git a/phivenv/Lib/site-packages/torch/include/c10/util/MaybeOwned.h b/phivenv/Lib/site-packages/torch/include/c10/util/MaybeOwned.h new file mode 100644 index 0000000000000000000000000000000000000000..074ae1070adb2a5699561bab078c5973e013ac38 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/util/MaybeOwned.h @@ -0,0 +1,237 @@ +#pragma once + +#include +#include + +#include +#include +#include + +namespace c10 { + +/// MaybeOwnedTraits describes how to borrow from T. Here is how we +/// can implement borrowing from an arbitrary type T using a raw +/// pointer to const: +template +struct MaybeOwnedTraitsGenericImpl { + using owned_type = T; + using borrow_type = const T*; + + static borrow_type createBorrow(const owned_type& from) { + return &from; + } + + static void assignBorrow(borrow_type& lhs, borrow_type rhs) { + lhs = rhs; + } + + static void destroyBorrow(borrow_type& /*toDestroy*/) {} + + static const owned_type& referenceFromBorrow(const borrow_type& borrow) { + return *borrow; + } + + static const owned_type* pointerFromBorrow(const borrow_type& borrow) { + return borrow; + } + + static bool debugBorrowIsValid(const borrow_type& borrow) { + return borrow != nullptr; + } +}; + +/// It is possible to eliminate the extra layer of indirection for +/// borrows for some types that we control. For examples, see +/// intrusive_ptr.h and TensorBody.h. + +template +struct MaybeOwnedTraits; + +// Explicitly enable MaybeOwned>, rather than allowing +// MaybeOwned to be used for any type right away. +template +struct MaybeOwnedTraits> + : public MaybeOwnedTraitsGenericImpl> {}; + +/// A smart pointer around either a borrowed or owned T. When +/// constructed with borrowed(), the caller MUST ensure that the +/// borrowed-from argument outlives this MaybeOwned. Compare to +/// Rust's std::borrow::Cow +/// (https://doc.rust-lang.org/std/borrow/enum.Cow.html), but note +/// that it is probably not suitable for general use because C++ has +/// no borrow checking. Included here to support +/// Tensor::expect_contiguous. +template +class MaybeOwned final { + using borrow_type = typename MaybeOwnedTraits::borrow_type; + using owned_type = typename MaybeOwnedTraits::owned_type; + + bool isBorrowed_; + union { + borrow_type borrow_; + owned_type own_; + }; + + /// Don't use this; use borrowed() instead. + explicit MaybeOwned(const owned_type& t) + : isBorrowed_(true), borrow_(MaybeOwnedTraits::createBorrow(t)) {} + + /// Don't use this; use owned() instead. + explicit MaybeOwned(T&& t) noexcept(std::is_nothrow_move_constructible_v) + : isBorrowed_(false), own_(std::move(t)) {} + + /// Don't use this; use owned() instead. + template + explicit MaybeOwned(std::in_place_t, Args&&... args) + : isBorrowed_(false), own_(std::forward(args)...) {} + + public: + explicit MaybeOwned() : isBorrowed_(true), borrow_() {} + + // Copying a borrow yields another borrow of the original, as with a + // T*. Copying an owned T yields another owned T for safety: no + // chains of borrowing by default! (Note you could get that behavior + // with MaybeOwned::borrowed(*rhs) if you wanted it.) + MaybeOwned(const MaybeOwned& rhs) : isBorrowed_(rhs.isBorrowed_) { + if (C10_LIKELY(rhs.isBorrowed_)) { + MaybeOwnedTraits::assignBorrow(borrow_, rhs.borrow_); + } else { + new (&own_) T(rhs.own_); + } + } + + MaybeOwned& operator=(const MaybeOwned& rhs) { + if (this == &rhs) { + return *this; + } + if (C10_UNLIKELY(!isBorrowed_)) { + if (rhs.isBorrowed_) { + own_.~T(); + MaybeOwnedTraits::assignBorrow(borrow_, rhs.borrow_); + isBorrowed_ = true; + } else { + own_ = rhs.own_; + } + } else { + if (C10_LIKELY(rhs.isBorrowed_)) { + MaybeOwnedTraits::assignBorrow(borrow_, rhs.borrow_); + } else { + MaybeOwnedTraits::destroyBorrow(borrow_); + new (&own_) T(rhs.own_); + isBorrowed_ = false; + } + } + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(isBorrowed_ == rhs.isBorrowed_); + return *this; + } + + MaybeOwned(MaybeOwned&& rhs) noexcept( + // NOLINTNEXTLINE(*-noexcept-move-*) + std::is_nothrow_move_constructible_v && + std::is_nothrow_move_assignable_v) + : isBorrowed_(rhs.isBorrowed_) { + if (C10_LIKELY(rhs.isBorrowed_)) { + MaybeOwnedTraits::assignBorrow(borrow_, rhs.borrow_); + } else { + new (&own_) T(std::move(rhs.own_)); + } + } + + MaybeOwned& operator=(MaybeOwned&& rhs) noexcept( + std::is_nothrow_move_assignable_v && + std::is_nothrow_move_assignable_v && + std::is_nothrow_move_constructible_v && + // NOLINTNEXTLINE(*-noexcept-move-*) + std::is_nothrow_destructible_v && + std::is_nothrow_destructible_v) { + if (this == &rhs) { + return *this; + } + if (C10_UNLIKELY(!isBorrowed_)) { + if (rhs.isBorrowed_) { + own_.~T(); + MaybeOwnedTraits::assignBorrow(borrow_, rhs.borrow_); + isBorrowed_ = true; + } else { + own_ = std::move(rhs.own_); + } + } else { + if (C10_LIKELY(rhs.isBorrowed_)) { + MaybeOwnedTraits::assignBorrow(borrow_, rhs.borrow_); + } else { + MaybeOwnedTraits::destroyBorrow(borrow_); + new (&own_) T(std::move(rhs.own_)); + isBorrowed_ = false; + } + } + return *this; + } + + static MaybeOwned borrowed(const T& t) { + return MaybeOwned(t); + } + + static MaybeOwned owned(T&& t) noexcept( + std::is_nothrow_move_constructible_v) { + return MaybeOwned(std::move(t)); + } + + template + static MaybeOwned owned(std::in_place_t, Args&&... args) { + return MaybeOwned(std::in_place, std::forward(args)...); + } + + ~MaybeOwned() noexcept( + // NOLINTNEXTLINE(*-noexcept-destructor) + std::is_nothrow_destructible_v && + std::is_nothrow_destructible_v) { + if (C10_UNLIKELY(!isBorrowed_)) { + own_.~T(); + } else { + MaybeOwnedTraits::destroyBorrow(borrow_); + } + } + + // This is an implementation detail! You should know what you're doing + // if you are testing this. If you just want to guarantee ownership move + // this into a T + bool unsafeIsBorrowed() const { + return isBorrowed_; + } + + const T& operator*() const& { + if (isBorrowed_) { + TORCH_INTERNAL_ASSERT_DEBUG_ONLY( + MaybeOwnedTraits::debugBorrowIsValid(borrow_)); + } + return C10_LIKELY(isBorrowed_) + ? MaybeOwnedTraits::referenceFromBorrow(borrow_) + : own_; + } + + const T* operator->() const { + if (isBorrowed_) { + TORCH_INTERNAL_ASSERT_DEBUG_ONLY( + MaybeOwnedTraits::debugBorrowIsValid(borrow_)); + } + return C10_LIKELY(isBorrowed_) + ? MaybeOwnedTraits::pointerFromBorrow(borrow_) + : &own_; + } + + // If borrowed, copy the underlying T. If owned, move from + // it. borrowed/owned state remains the same, and either we + // reference the same borrow as before or we are an owned moved-from + // T. + T operator*() && { + if (isBorrowed_) { + TORCH_INTERNAL_ASSERT_DEBUG_ONLY( + MaybeOwnedTraits::debugBorrowIsValid(borrow_)); + return MaybeOwnedTraits::referenceFromBorrow(borrow_); + } else { + return std::move(own_); + } + } +}; + +} // namespace c10 diff --git a/phivenv/Lib/site-packages/torch/include/c10/util/Metaprogramming.h b/phivenv/Lib/site-packages/torch/include/c10/util/Metaprogramming.h new file mode 100644 index 0000000000000000000000000000000000000000..0e47b356cd6cdcc3c7d4bc5811553fa371b2be5e --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/util/Metaprogramming.h @@ -0,0 +1,224 @@ +#pragma once + +#include +#include + +namespace c10::guts { + +/** + * Access information about result type or arguments from a function type. + * Example: + * using A = function_traits::return_type // A == int + * using A = function_traits::parameter_types::tuple_type + * // A == tuple + */ +template +struct function_traits { + static_assert( + !std::is_same_v, + "In function_traits, Func must be a plain function type."); +}; +template +struct function_traits { + using func_type = Result(Args...); + using return_type = Result; + using parameter_types = typelist::typelist; + static constexpr auto number_of_parameters = sizeof...(Args); +}; + +/** + * infer_function_traits: creates a `function_traits` type for a simple + * function (pointer) or functor (lambda/struct). Currently does not support + * class methods. + */ + +template +struct infer_function_traits { + using type = function_traits< + c10::guts::detail::strip_class_t>; +}; + +template +struct infer_function_traits { + using type = function_traits; +}; + +template +struct infer_function_traits { + using type = function_traits; +}; + +template +using infer_function_traits_t = typename infer_function_traits::type; + +/** + * make_function_traits: creates a `function_traits` type given a Return type + * and a typelist of Argument types + * + * Example: + * bool f(int, int); + * + * infer_function_traits_t == make_function_traits_t> + */ +template +struct make_function_traits { + static_assert( + false_t::value, + "In guts::make_function_traits, the ArgList argument must be typelist<...>."); +}; + +template +struct make_function_traits> { + using type = function_traits; +}; + +template +using make_function_traits_t = + typename make_function_traits::type; + +/** + * make_offset_index_sequence + * Like make_index_sequence, but starting from Start instead of 0. + * + * Example: + * make_offset_index_sequence<10, 3> == std::index_sequence<10, 11, 12> + */ +template +struct make_offset_index_sequence_impl + : make_offset_index_sequence_impl { + static_assert( + static_cast(Start) >= 0, + "make_offset_index_sequence: Start < 0"); + static_assert(static_cast(N) >= 0, "make_offset_index_sequence: N < 0"); +}; + +template +struct make_offset_index_sequence_impl { + typedef std::index_sequence type; +}; + +template +using make_offset_index_sequence = + typename make_offset_index_sequence_impl::type; + +/** + * Use tuple_elements to extract a position-indexed subset of elements + * from the argument tuple into a result tuple. + * + * Example: + * std::tuple t = std::make_tuple(0, "HEY", 2.0); + * std::tuple result = tuple_elements(t, std::index_sequence<0, + * 2>()); + */ +template +constexpr auto tuple_elements(Tuple t, std::index_sequence) { + return std::tuple...>(std::get(t)...); +} + +/** + * Use tuple_take to extract the first or last n elements from the argument + * tuple into a result tuple. + * + * Example: + * std::tuple t = std::make_tuple(0, "HEY", 2.0); + * std::tuple first_two = tuple_take(t); + * std::tuple last_two = tuple_take(t); + */ +template +struct TupleTake {}; + +template +struct TupleTake= 0, void>> { + static auto call(Tuple t) { + constexpr size_t size = std::tuple_size(); + static_assert(N <= size, "tuple_take: N > size"); + return tuple_elements(t, std::make_index_sequence{}); + } +}; + +template + struct TupleTake < Tuple, + N, std::enable_if_t> { + static auto call(Tuple t) { + constexpr size_t size = std::tuple_size(); + static_assert(-N <= size, "tuple_take: -N > size"); + return tuple_elements(t, make_offset_index_sequence{}); + } +}; + +template +auto tuple_take(Tuple t) { + return TupleTake::call(t); +} + +/** + * Use tuple_slice to extract a contiguous subtuple from the argument. + * + * Example: + * std::tuple t = std::make_tuple(0, + * "HEY", 2.0, false); std::tuple middle_two = + * tuple_slice(t); + */ +template +constexpr auto tuple_slice(Tuple t) { + constexpr size_t size = std::tuple_size(); + static_assert(Start + N <= size, "tuple_slice: Start + N > size"); + return tuple_elements(t, make_offset_index_sequence{}); +} + +/** + * Use tuple_map to run a mapping function over a tuple to get a new tuple. + * + * Example 1: + * auto result = tuple_map(std::tuple(3, 4, 5), [] + * (int32_t a) -> int16_t {return a+1;}); + * // result == std::tuple(4, 5, 6) + * + * Example 2: + * struct Mapper { + * std::string operator()(int32_t a) const { + * return std::to_string(a); + * } + * int64_t operator()(const std::string& a) const { + * return atoi(a.c_str()); + * } + * }; + * auto result = tuple_map(std::tuple(3, "4"), + * Mapper()); + * // result == std::tuple("3", 4) + * + * Example 3: + * struct A final { + * int32_t func() { + * return 5; + * } + * }; + * struct B final { + * std::string func() { + * return "5"; + * } + * }; + * auto result = tuple_map(std::make_tuple(A(), B()), [] (auto a) { return + * a.func(); }); + * // result == std::tuple(5, "5"); + */ +namespace detail { +template +auto tuple_map( + // NOLINTNEXTLINE(cppcoreguidelines-rvalue-reference-param-not-moved) + std::tuple&& tuple, + const Mapper& mapper, + std::index_sequence) { + return std::tuple(std::get( + tuple))))...>(mapper(std::forward(std::get(tuple)))...); +} +} // namespace detail + +template +auto tuple_map(std::tuple&& tuple, const Mapper& mapper) { + return detail::tuple_map( + std::move(tuple), mapper, std::index_sequence_for()); +} + +} // namespace c10::guts diff --git a/phivenv/Lib/site-packages/torch/include/c10/util/NetworkFlow.h b/phivenv/Lib/site-packages/torch/include/c10/util/NetworkFlow.h new file mode 100644 index 0000000000000000000000000000000000000000..10bcd58f56c6efa5f7e624b1b5eeff1fceb410a0 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/util/NetworkFlow.h @@ -0,0 +1,54 @@ +#pragma once + +#include + +#include +#include + +/** + * This file provides a network flow implementation. + * https://en.wikipedia.org/wiki/Flow_network + * + * It aims to mirror some of the behavior of networkx, which is/was used by + * functorch partitioners for splitting the graph into a forward and backward + * graph. + */ + +namespace c10 { + +enum class C10_API_ENUM MinCutStatus { + SUCCESS = 0, + UNBOUNDED = 1, + OVERFLOW_INF = 2, + INVALID = 3, +}; + +struct MinCutResult { + MinCutStatus status; + int64_t max_flow; + std::vector reachable; + std::vector unreachable; +}; + +// Modeled after networkx implementation +class C10_API NetworkFlowGraph { + public: + // selected such that INF + INF is < INT64_MAX + constexpr static int64_t INF = (1LL << 62) - 1; + + struct Edge { + std::string source, dest; + int64_t capacity; + }; + + MinCutStatus add_edge( + const std::string& source, + const std::string& dest, + int64_t capacity = 1); + + MinCutResult minimum_cut(const std::string& s, const std::string& t) const; + + std::vector edges; +}; + +} // namespace c10 diff --git a/phivenv/Lib/site-packages/torch/include/c10/util/Optional.h b/phivenv/Lib/site-packages/torch/include/c10/util/Optional.h new file mode 100644 index 0000000000000000000000000000000000000000..93f7c06cd2af23a031246f0d5a2e04236a778b87 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/util/Optional.h @@ -0,0 +1,60 @@ +#ifndef C10_UTIL_OPTIONAL_H_ +#define C10_UTIL_OPTIONAL_H_ + +#include +#include + +// Macros.h is not needed, but it does namespace shenanigans that lots +// of downstream code seems to rely on. Feel free to remove it and fix +// up builds. + +namespace c10 { + +#if !defined(FBCODE_CAFFE2) && !defined(C10_NODEPRECATED) +// NOLINTNEXTLINE(misc-unused-using-decls) +using std::bad_optional_access; +// NOLINTNEXTLINE(misc-unused-using-decls) +using std::make_optional; +// NOLINTNEXTLINE(misc-unused-using-decls) +using std::nullopt; +// NOLINTNEXTLINE(misc-unused-using-decls) +using std::nullopt_t; +// NOLINTNEXTLINE(misc-unused-using-decls) +using std::optional; +#endif + +#if !defined(FBCODE_CAFFE2) && !defined(C10_NODEPRECATED) + +namespace detail_ { +// the call to convert(b) has return type A and converts b to type A iff b +// decltype(b) is implicitly convertible to A +template +constexpr U convert(U v) { + return v; +} +} // namespace detail_ +template +[[deprecated( + "Please use std::optional::value_or instead of c10::value_or_else")]] constexpr T +value_or_else(const std::optional& v, F&& func) { + static_assert( + std::is_convertible_v, T>, + "func parameters must be a callable that returns a type convertible to the value stored in the optional"); + return v.has_value() ? *v : detail_::convert(std::forward(func)()); +} + +template +[[deprecated( + "Please use std::optional::value_or instead of c10::value_or_else")]] constexpr T +value_or_else(std::optional&& v, F&& func) { + static_assert( + std::is_convertible_v, T>, + "func parameters must be a callable that returns a type convertible to the value stored in the optional"); + return v.has_value() ? constexpr_move(std::move(v).contained_val()) + : detail_::convert(std::forward(func)()); +} + +#endif + +} // namespace c10 +#endif // C10_UTIL_OPTIONAL_H_ diff --git a/phivenv/Lib/site-packages/torch/include/c10/util/OptionalArrayRef.h b/phivenv/Lib/site-packages/torch/include/c10/util/OptionalArrayRef.h new file mode 100644 index 0000000000000000000000000000000000000000..5fe569913c2bfa2cacda9c1e3e1ad9e56744250f --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/util/OptionalArrayRef.h @@ -0,0 +1,237 @@ +// This file defines OptionalArrayRef, a class that has almost the same +// exact functionality as std::optional>, except that its +// converting constructor fixes a dangling pointer issue. +// +// The implicit converting constructor of both std::optional> and +// std::optional> can cause the underlying ArrayRef to store +// a dangling pointer. OptionalArrayRef prevents this by wrapping +// a std::optional> and fixing the constructor implementation. +// +// See https://github.com/pytorch/pytorch/issues/63645 for more on this. + +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace c10 { + +template +class OptionalArrayRef final { + public: + // Constructors + + constexpr OptionalArrayRef() noexcept = default; + + constexpr OptionalArrayRef(std::nullopt_t) noexcept {} + + OptionalArrayRef(const OptionalArrayRef& other) = default; + + OptionalArrayRef(OptionalArrayRef&& other) noexcept = default; + + constexpr OptionalArrayRef(const std::optional>& other) noexcept + : wrapped_opt_array_ref(other) {} + + constexpr OptionalArrayRef(std::optional>&& other) noexcept + : wrapped_opt_array_ref(std::move(other)) {} + + constexpr OptionalArrayRef(const T& value) noexcept + : wrapped_opt_array_ref(value) {} + + template < + typename U = ArrayRef, + std::enable_if_t< + !std::is_same_v, OptionalArrayRef> && + !std::is_same_v, std::in_place_t> && + std::is_constructible_v, U&&> && + std::is_convertible_v> && + !std::is_convertible_v, + bool> = false> + constexpr OptionalArrayRef(U&& value) noexcept( + std::is_nothrow_constructible_v, U&&>) + : wrapped_opt_array_ref(std::forward(value)) {} + + template < + typename U = ArrayRef, + std::enable_if_t< + !std::is_same_v, OptionalArrayRef> && + !std::is_same_v, std::in_place_t> && + std::is_constructible_v, U&&> && + !std::is_convertible_v>, + bool> = false> + constexpr explicit OptionalArrayRef(U&& value) noexcept( + std::is_nothrow_constructible_v, U&&>) + : wrapped_opt_array_ref(std::forward(value)) {} + + template + constexpr explicit OptionalArrayRef( + std::in_place_t ip, + Args&&... args) noexcept + : wrapped_opt_array_ref(ip, std::forward(args)...) {} + + template + constexpr explicit OptionalArrayRef( + std::in_place_t ip, + std::initializer_list il, + Args&&... args) + : wrapped_opt_array_ref(ip, il, std::forward(args)...) {} + + constexpr OptionalArrayRef(const std::initializer_list& Vec) + : wrapped_opt_array_ref(ArrayRef(Vec)) {} + + // Destructor + + ~OptionalArrayRef() = default; + + // Assignment + + constexpr OptionalArrayRef& operator=(std::nullopt_t) noexcept { + wrapped_opt_array_ref = std::nullopt; + return *this; + } + + OptionalArrayRef& operator=(const OptionalArrayRef& other) = default; + + OptionalArrayRef& operator=(OptionalArrayRef&& other) noexcept = default; + + constexpr OptionalArrayRef& operator=( + const std::optional>& other) noexcept { + wrapped_opt_array_ref = other; + return *this; + } + + constexpr OptionalArrayRef& operator=( + std::optional>&& other) noexcept { + wrapped_opt_array_ref = std::move(other); + return *this; + } + + template < + typename U = ArrayRef, + typename = std::enable_if_t< + !std::is_same_v, OptionalArrayRef> && + std::is_constructible_v, U&&> && + std::is_assignable_v&, U&&>>> + constexpr OptionalArrayRef& operator=(U&& value) noexcept( + std::is_nothrow_constructible_v, U&&> && + std::is_nothrow_assignable_v&, U&&>) { + wrapped_opt_array_ref = std::forward(value); + return *this; + } + + // Observers + + constexpr ArrayRef* operator->() noexcept { + return &wrapped_opt_array_ref.value(); + } + + constexpr const ArrayRef* operator->() const noexcept { + return &wrapped_opt_array_ref.value(); + } + + constexpr ArrayRef& operator*() & noexcept { + return wrapped_opt_array_ref.value(); + } + + constexpr const ArrayRef& operator*() const& noexcept { + return wrapped_opt_array_ref.value(); + } + + constexpr ArrayRef&& operator*() && noexcept { + return std::move(wrapped_opt_array_ref.value()); + } + + constexpr const ArrayRef&& operator*() const&& noexcept { + return std::move(wrapped_opt_array_ref.value()); + } + + constexpr explicit operator bool() const noexcept { + return wrapped_opt_array_ref.has_value(); + } + + constexpr bool has_value() const noexcept { + return wrapped_opt_array_ref.has_value(); + } + + constexpr ArrayRef& value() & { + return wrapped_opt_array_ref.value(); + } + + constexpr const ArrayRef& value() const& { + // NOLINTNEXTLINE(bugprone-unchecked-optional-access) + return wrapped_opt_array_ref.value(); + } + + constexpr ArrayRef&& value() && { + return std::move(wrapped_opt_array_ref.value()); + } + + constexpr const ArrayRef&& value() const&& { + return std::move(wrapped_opt_array_ref.value()); + } + + template + constexpr std:: + enable_if_t>, ArrayRef> + value_or(U&& default_value) const& { + return wrapped_opt_array_ref.value_or(std::forward(default_value)); + } + + template + constexpr std:: + enable_if_t>, ArrayRef> + value_or(U&& default_value) && { + return wrapped_opt_array_ref.value_or(std::forward(default_value)); + } + + // Modifiers + + constexpr void swap(OptionalArrayRef& other) noexcept { + std::swap(wrapped_opt_array_ref, other.wrapped_opt_array_ref); + } + + constexpr void reset() noexcept { + wrapped_opt_array_ref.reset(); + } + + template + constexpr std:: + enable_if_t, Args&&...>, ArrayRef&> + emplace(Args&&... args) noexcept( + std::is_nothrow_constructible_v, Args&&...>) { + return wrapped_opt_array_ref.emplace(std::forward(args)...); + } + + template + constexpr ArrayRef& emplace( + std::initializer_list il, + Args&&... args) noexcept { + return wrapped_opt_array_ref.emplace(il, std::forward(args)...); + } + + private: + std::optional> wrapped_opt_array_ref; +}; + +using OptionalIntArrayRef = OptionalArrayRef; + +inline bool operator==( + const OptionalIntArrayRef& a1, + const IntArrayRef& other) { + if (!a1.has_value()) { + return false; + } + return a1.value() == other; +} + +inline bool operator==( + const c10::IntArrayRef& a1, + const c10::OptionalIntArrayRef& a2) { + return a2 == a1; +} + +} // namespace c10 diff --git a/phivenv/Lib/site-packages/torch/include/c10/util/ParallelGuard.h b/phivenv/Lib/site-packages/torch/include/c10/util/ParallelGuard.h new file mode 100644 index 0000000000000000000000000000000000000000..cabeafcacbdfd6235040b74c00c2e89183b3bf8a --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/util/ParallelGuard.h @@ -0,0 +1,20 @@ +#pragma once + +#include + +namespace c10 { + +// RAII thread local guard that tracks whether code is being executed in +// `at::parallel_for` or `at::parallel_reduce` loop function. +class C10_API ParallelGuard { + public: + static bool is_enabled(); + + ParallelGuard(bool state); + ~ParallelGuard(); + + private: + bool previous_state_; +}; + +} // namespace c10 diff --git a/phivenv/Lib/site-packages/torch/include/c10/util/Registry.h b/phivenv/Lib/site-packages/torch/include/c10/util/Registry.h new file mode 100644 index 0000000000000000000000000000000000000000..d3a09c89a0b5bce1c13f46e1508a816ac55b60cd --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/util/Registry.h @@ -0,0 +1,329 @@ +#ifndef C10_UTIL_REGISTRY_H_ +#define C10_UTIL_REGISTRY_H_ + +/** + * Simple registry implementation that uses static variables to + * register object creators during program initialization time. + */ + +// NB: This Registry works poorly when you have other namespaces. +// Make all macro invocations from inside the at namespace. + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +namespace c10 { + +template +inline std::string KeyStrRepr(const KeyType& /*key*/) { + return "[key type printing not supported]"; +} + +template <> +inline std::string KeyStrRepr(const std::string& key) { + return key; +} + +enum RegistryPriority { + REGISTRY_FALLBACK = 1, + REGISTRY_DEFAULT = 2, + REGISTRY_PREFERRED = 3, +}; + +/** + * @brief A template class that allows one to register classes by keys. + * + * The keys are usually a std::string specifying the name, but can be anything + * that can be used in a std::map. + * + * You should most likely not use the Registry class explicitly, but use the + * helper macros below to declare specific registries as well as registering + * objects. + */ +template +class Registry { + public: + typedef std::function Creator; + + Registry(bool warning = true) : registry_(), priority_(), warning_(warning) {} + ~Registry() = default; + + void Register( + const SrcType& key, + Creator creator, + const RegistryPriority priority = REGISTRY_DEFAULT) { + std::lock_guard lock(register_mutex_); + // The if statement below is essentially the same as the following line: + // TORCH_CHECK_EQ(registry_.count(key), 0) << "Key " << key + // << " registered twice."; + // However, TORCH_CHECK_EQ depends on google logging, and since registration + // is carried out at static initialization time, we do not want to have an + // explicit dependency on glog's initialization function. + if (registry_.count(key) != 0) { + auto cur_priority = priority_[key]; + if (priority > cur_priority) { +#ifdef DEBUG + std::string warn_msg = + "Overwriting already registered item for key " + KeyStrRepr(key); + fprintf(stderr, "%s\n", warn_msg.c_str()); +#endif + registry_[key] = creator; + priority_[key] = priority; + } else if (priority == cur_priority) { + std::string err_msg = + "Key already registered with the same priority: " + KeyStrRepr(key); + fprintf(stderr, "%s\n", err_msg.c_str()); + if (terminate_) { + std::exit(1); + } else { + throw std::runtime_error(err_msg); + } + } else if (warning_) { + std::string warn_msg = + "Higher priority item already registered, skipping registration of " + + KeyStrRepr(key); + fprintf(stderr, "%s\n", warn_msg.c_str()); + } + } else { + registry_[key] = creator; + priority_[key] = priority; + } + } + + void Register( + const SrcType& key, + Creator creator, + const std::string& help_msg, + const RegistryPriority priority = REGISTRY_DEFAULT) { + Register(key, creator, priority); + help_message_[key] = help_msg; + } + + inline bool Has(const SrcType& key) { + return (registry_.count(key) != 0); + } + + ObjectPtrType Create(const SrcType& key, Args... args) { + auto it = registry_.find(key); + if (it == registry_.end()) { + // Returns nullptr if the key is not registered. + return nullptr; + } + return it->second(args...); + } + + /** + * Returns the keys currently registered as a std::vector. + */ + std::vector Keys() const { + std::vector keys; + keys.reserve(registry_.size()); + for (const auto& it : registry_) { + keys.push_back(it.first); + } + return keys; + } + + inline const std::unordered_map& HelpMessage() const { + return help_message_; + } + + const char* HelpMessage(const SrcType& key) const { + auto it = help_message_.find(key); + if (it == help_message_.end()) { + return nullptr; + } + return it->second.c_str(); + } + + // Used for testing, if terminate is unset, Registry throws instead of + // calling std::exit + void SetTerminate(bool terminate) { + terminate_ = terminate; + } + + C10_DISABLE_COPY_AND_ASSIGN(Registry); + Registry(Registry&&) = delete; + Registry& operator=(Registry&&) = delete; + + private: + std::unordered_map registry_; + std::unordered_map priority_; + bool terminate_{true}; + const bool warning_; + std::unordered_map help_message_; + std::mutex register_mutex_; +}; + +template +class Registerer { + public: + explicit Registerer( + const SrcType& key, + Registry* registry, + typename Registry::Creator creator, + const std::string& help_msg = "") { + registry->Register(key, creator, help_msg); + } + + explicit Registerer( + const SrcType& key, + const RegistryPriority priority, + Registry* registry, + typename Registry::Creator creator, + const std::string& help_msg = "") { + registry->Register(key, creator, help_msg, priority); + } + + template + static ObjectPtrType DefaultCreator(Args... args) { + return ObjectPtrType(new DerivedType(args...)); + } +}; + +/** + * C10_DECLARE_TYPED_REGISTRY is a macro that expands to a function + * declaration, as well as creating a convenient typename for its corresponding + * registerer. + */ +// Note on C10_IMPORT and C10_EXPORT below: we need to explicitly mark DECLARE +// as import and DEFINE as export, because these registry macros will be used +// in downstream shared libraries as well, and one cannot use *_API - the API +// macro will be defined on a per-shared-library basis. Semantically, when one +// declares a typed registry it is always going to be IMPORT, and when one +// defines a registry (which should happen ONLY ONCE and ONLY IN SOURCE FILE), +// the instantiation unit is always going to be exported. +// +// The only unique condition is when in the same file one does DECLARE and +// DEFINE - in Windows compilers, this generates a warning that dllimport and +// dllexport are mixed, but the warning is fine and linker will be properly +// exporting the symbol. Same thing happens in the gflags flag declaration and +// definition caes. +#define C10_DECLARE_TYPED_REGISTRY( \ + RegistryName, SrcType, ObjectType, PtrType, ...) \ + C10_API ::c10::Registry, ##__VA_ARGS__>* \ + RegistryName(); \ + typedef ::c10::Registerer, ##__VA_ARGS__> \ + Registerer##RegistryName + +#define TORCH_DECLARE_TYPED_REGISTRY( \ + RegistryName, SrcType, ObjectType, PtrType, ...) \ + TORCH_API ::c10::Registry, ##__VA_ARGS__>* \ + RegistryName(); \ + typedef ::c10::Registerer, ##__VA_ARGS__> \ + Registerer##RegistryName + +#define C10_DEFINE_TYPED_REGISTRY( \ + RegistryName, SrcType, ObjectType, PtrType, ...) \ + C10_EXPORT ::c10::Registry, ##__VA_ARGS__>* \ + RegistryName() { \ + static ::c10::Registry, ##__VA_ARGS__>* \ + registry = new ::c10:: \ + Registry, ##__VA_ARGS__>(); \ + return registry; \ + } + +#define C10_DEFINE_TYPED_REGISTRY_WITHOUT_WARNING( \ + RegistryName, SrcType, ObjectType, PtrType, ...) \ + C10_EXPORT ::c10::Registry, ##__VA_ARGS__>* \ + RegistryName() { \ + static ::c10::Registry, ##__VA_ARGS__>* \ + registry = \ + new ::c10::Registry, ##__VA_ARGS__>( \ + false); \ + return registry; \ + } + +// Note(Yangqing): The __VA_ARGS__ below allows one to specify a templated +// creator with comma in its templated arguments. +#define C10_REGISTER_TYPED_CREATOR(RegistryName, key, ...) \ + static Registerer##RegistryName C10_ANONYMOUS_VARIABLE(g_##RegistryName)( \ + key, RegistryName(), ##__VA_ARGS__); + +#define C10_REGISTER_TYPED_CREATOR_WITH_PRIORITY( \ + RegistryName, key, priority, ...) \ + static Registerer##RegistryName C10_ANONYMOUS_VARIABLE(g_##RegistryName)( \ + key, priority, RegistryName(), ##__VA_ARGS__); + +#define C10_REGISTER_TYPED_CLASS(RegistryName, key, ...) \ + static Registerer##RegistryName C10_ANONYMOUS_VARIABLE(g_##RegistryName)( \ + key, \ + RegistryName(), \ + Registerer##RegistryName::DefaultCreator<__VA_ARGS__>, \ + ::c10::demangle_type<__VA_ARGS__>()); + +#define C10_REGISTER_TYPED_CLASS_WITH_PRIORITY( \ + RegistryName, key, priority, ...) \ + static Registerer##RegistryName C10_ANONYMOUS_VARIABLE(g_##RegistryName)( \ + key, \ + priority, \ + RegistryName(), \ + Registerer##RegistryName::DefaultCreator<__VA_ARGS__>, \ + ::c10::demangle_type<__VA_ARGS__>()); + +// C10_DECLARE_REGISTRY and C10_DEFINE_REGISTRY are hard-wired to use +// std::string as the key type, because that is the most commonly used cases. +#define C10_DECLARE_REGISTRY(RegistryName, ObjectType, ...) \ + C10_DECLARE_TYPED_REGISTRY( \ + RegistryName, std::string, ObjectType, std::unique_ptr, ##__VA_ARGS__) + +#define TORCH_DECLARE_REGISTRY(RegistryName, ObjectType, ...) \ + TORCH_DECLARE_TYPED_REGISTRY( \ + RegistryName, std::string, ObjectType, std::unique_ptr, ##__VA_ARGS__) + +#define C10_DEFINE_REGISTRY(RegistryName, ObjectType, ...) \ + C10_DEFINE_TYPED_REGISTRY( \ + RegistryName, std::string, ObjectType, std::unique_ptr, ##__VA_ARGS__) + +#define C10_DEFINE_REGISTRY_WITHOUT_WARNING(RegistryName, ObjectType, ...) \ + C10_DEFINE_TYPED_REGISTRY_WITHOUT_WARNING( \ + RegistryName, std::string, ObjectType, std::unique_ptr, ##__VA_ARGS__) + +#define C10_DECLARE_SHARED_REGISTRY(RegistryName, ObjectType, ...) \ + C10_DECLARE_TYPED_REGISTRY( \ + RegistryName, std::string, ObjectType, std::shared_ptr, ##__VA_ARGS__) + +#define TORCH_DECLARE_SHARED_REGISTRY(RegistryName, ObjectType, ...) \ + TORCH_DECLARE_TYPED_REGISTRY( \ + RegistryName, std::string, ObjectType, std::shared_ptr, ##__VA_ARGS__) + +#define C10_DEFINE_SHARED_REGISTRY(RegistryName, ObjectType, ...) \ + C10_DEFINE_TYPED_REGISTRY( \ + RegistryName, std::string, ObjectType, std::shared_ptr, ##__VA_ARGS__) + +#define C10_DEFINE_SHARED_REGISTRY_WITHOUT_WARNING( \ + RegistryName, ObjectType, ...) \ + C10_DEFINE_TYPED_REGISTRY_WITHOUT_WARNING( \ + RegistryName, std::string, ObjectType, std::shared_ptr, ##__VA_ARGS__) + +// C10_REGISTER_CREATOR and C10_REGISTER_CLASS are hard-wired to use std::string +// as the key +// type, because that is the most commonly used cases. +#define C10_REGISTER_CREATOR(RegistryName, key, ...) \ + C10_REGISTER_TYPED_CREATOR(RegistryName, #key, __VA_ARGS__) + +#define C10_REGISTER_CREATOR_WITH_PRIORITY(RegistryName, key, priority, ...) \ + C10_REGISTER_TYPED_CREATOR_WITH_PRIORITY( \ + RegistryName, #key, priority, __VA_ARGS__) + +#define C10_REGISTER_CLASS(RegistryName, key, ...) \ + C10_REGISTER_TYPED_CLASS(RegistryName, #key, __VA_ARGS__) + +#define C10_REGISTER_CLASS_WITH_PRIORITY(RegistryName, key, priority, ...) \ + C10_REGISTER_TYPED_CLASS_WITH_PRIORITY( \ + RegistryName, #key, priority, __VA_ARGS__) + +} // namespace c10 + +#endif // C10_UTIL_REGISTRY_H_ diff --git a/phivenv/Lib/site-packages/torch/include/c10/util/ScopeExit.h b/phivenv/Lib/site-packages/torch/include/c10/util/ScopeExit.h new file mode 100644 index 0000000000000000000000000000000000000000..8ed2373eea03bb8615d6742f05996e9aa27158d6 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/util/ScopeExit.h @@ -0,0 +1,50 @@ +#pragma once + +#include +#include + +namespace c10 { + +/** + * Mostly copied from https://llvm.org/doxygen/ScopeExit_8h_source.html + */ +template +class scope_exit { + Callable ExitFunction; + bool Engaged = true; // False once moved-from or release()d. + + public: + template + // NOLINTNEXTLINE(bugprone-forwarding-reference-overload) + explicit scope_exit(Fp&& F) : ExitFunction(std::forward(F)) {} + + scope_exit(scope_exit&& Rhs) noexcept + : ExitFunction(std::move(Rhs.ExitFunction)), Engaged(Rhs.Engaged) { + Rhs.release(); + } + scope_exit(const scope_exit&) = delete; + scope_exit& operator=(scope_exit&&) = delete; + scope_exit& operator=(const scope_exit&) = delete; + + void release() { + Engaged = false; + } + + ~scope_exit() { + if (Engaged) { + ExitFunction(); + } + } +}; + +// Keeps the callable object that is passed in, and execute it at the +// destruction of the returned object (usually at the scope exit where the +// returned object is kept). +// +// Interface is specified by p0052r2. +template +scope_exit> make_scope_exit(Callable&& F) { + return scope_exit>(std::forward(F)); +} + +} // namespace c10 diff --git a/phivenv/Lib/site-packages/torch/include/c10/util/Semaphore.h b/phivenv/Lib/site-packages/torch/include/c10/util/Semaphore.h new file mode 100644 index 0000000000000000000000000000000000000000..ca44c5d037de2e03b472e5a731870ac32554f284 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/util/Semaphore.h @@ -0,0 +1,71 @@ +#pragma once + +#include + +/* + a simple semaphore interface. +*/ + +// note: __cpp_lib_semaphore will not be defined in some apple platforms +// even if >= C++20. +#if __has_include() && defined(__cpp_lib_semaphore) && __cpp_lib_semaphore >= 201907L +#define C10_SEMAPHORE_USE_STL +#endif + +#ifdef C10_SEMAPHORE_USE_STL +#include +#else +// To use moodycamel semaphore, we need to include the header file +// for concurrentqueue first. Hiding implementation detail here. +#ifdef BLOCK_SIZE +#pragma push_macro("BLOCK_SIZE") +#undef BLOCK_SIZE +#include // @manual +#pragma pop_macro("BLOCK_SIZE") +#else +#include // @manual +#endif + +#include // @manual +#endif + +namespace c10 { + +class Semaphore { + public: + Semaphore(int32_t initial_count = 0) : impl_(initial_count) {} + + void release(int32_t n = 1) { +#ifdef C10_SEMAPHORE_USE_STL + impl_.release(n); +#else + impl_.signal(n); +#endif + } + + void acquire() { +#ifdef C10_SEMAPHORE_USE_STL + impl_.acquire(); +#else + impl_.wait(); +#endif + } + + bool tryAcquire() { +#ifdef C10_SEMAPHORE_USE_STL + return impl_.try_acquire(); +#else + return impl_.tryWait(); +#endif + } + + private: +#ifdef C10_SEMAPHORE_USE_STL + std::counting_semaphore<> impl_; +#else + moodycamel::LightweightSemaphore impl_; +#endif +}; +} // namespace c10 + +#undef C10_SEMAPHORE_USE_STL diff --git a/phivenv/Lib/site-packages/torch/include/c10/util/SmallBuffer.h b/phivenv/Lib/site-packages/torch/include/c10/util/SmallBuffer.h new file mode 100644 index 0000000000000000000000000000000000000000..1e4317ee03d0913d1a4128c7330631d01b889510 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/util/SmallBuffer.h @@ -0,0 +1,87 @@ +#pragma once +#include +#include +#include +#include + +/** Helper class for allocating temporary fixed size arrays with SBO. + * + * This is intentionally much simpler than SmallVector, to improve performance + * at the expense of many features: + * - No zero-initialization for numeric types + * - No resizing after construction + * - No copy/move + * - No non-trivial types + */ + +namespace c10 { + +template +class SmallBuffer { + static_assert(std::is_trivial_v, "SmallBuffer is intended for POD types"); + + std::array storage_; + size_t size_{}; + T* data_{}; + + public: + SmallBuffer(size_t size) : size_(size) { + if (size > N) { + data_ = new T[size]; + } else { + data_ = &storage_[0]; + } + } + + SmallBuffer(const SmallBuffer&) = delete; + SmallBuffer& operator=(const SmallBuffer&) = delete; + + // move constructor is needed in function return + SmallBuffer(SmallBuffer&& rhs) noexcept : size_{rhs.size_} { + rhs.size_ = 0; + if (size_ > N) { + data_ = rhs.data_; + rhs.data_ = nullptr; + } else { + storage_ = std::move(rhs.storage_); + data_ = &storage_[0]; + } + } + + SmallBuffer& operator=(SmallBuffer&&) = delete; + + ~SmallBuffer() { + if (size_ > N) { + delete[] data_; + } + } + T& operator[](size_t idx) { + return data()[idx]; + } + const T& operator[](size_t idx) const { + return data()[idx]; + } + T* data() { + return data_; + } + const T* data() const { + return data_; + } + size_t size() const { + return size_; + } + T* begin() { + return data_; + } + const T* begin() const { + return data_; + } + T* end() { + return data_ + size_; + } + const T* end() const { + return data_ + size_; + } +}; + +} // namespace c10 diff --git a/phivenv/Lib/site-packages/torch/include/c10/util/SmallVector.h b/phivenv/Lib/site-packages/torch/include/c10/util/SmallVector.h new file mode 100644 index 0000000000000000000000000000000000000000..60cd3e6cb9ae761caf7030efde65ee278b2f3d15 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/util/SmallVector.h @@ -0,0 +1,1467 @@ +//===- llvm/ADT/SmallVector.h - 'Normally small' vectors --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the SmallVector class. +// +//===----------------------------------------------------------------------===// + +// ATen: modified from llvm::SmallVector. +// used std::is_trivially_{copy,move}_constructible +// replaced iterator_range constructor with inline Container&& constructor +// replaced LLVM_NODISCARD, LLVM_LIKELY, and LLVM_UNLIKELY with c10 equivalents +// removed LLVM_GSL_OWNER +// added SmallVector::at +// added operator<< for std::ostream +// added C10_API to export SmallVectorBase + +#pragma once + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace c10 { + +/// This is all the stuff common to all SmallVectors. +/// +/// The template parameter specifies the type which should be used to hold the +/// Size and Capacity of the SmallVector, so it can be adjusted. +/// Using 32 bit size is desirable to shrink the size of the SmallVector. +/// Using 64 bit size is desirable for cases like SmallVector, where a +/// 32 bit size would limit the vector to ~4GB. SmallVectors are used for +/// buffering bitcode output - which can exceed 4GB. +template +class C10_API SmallVectorBase { + protected: + void* BeginX; + Size_T Size = 0, Capacity; + + /// The maximum value of the Size_T used. + static constexpr size_t SizeTypeMax() { + return std::numeric_limits::max(); + } + + SmallVectorBase(void* FirstEl, size_t TotalCapacity) + : BeginX(FirstEl), Capacity(TotalCapacity) {} + + /// This is a helper for \a grow() that's out of line to reduce code + /// duplication. This function will report a fatal error if it can't grow at + /// least to \p MinSize. + void* mallocForGrow(size_t MinSize, size_t TSize, size_t& NewCapacity); + + /// This is an implementation of the grow() method which only works + /// on POD-like data types and is out of line to reduce code duplication. + /// This function will report a fatal error if it cannot increase capacity. + void grow_pod(const void* FirstEl, size_t MinSize, size_t TSize); + + public: + SmallVectorBase() = delete; + size_t size() const { + return Size; + } + size_t capacity() const { + return Capacity; + } + + [[nodiscard]] bool empty() const { + return !Size; + } + + /// Set the array size to \p N, which the current array must have enough + /// capacity for. + /// + /// This does not construct or destroy any elements in the vector. + /// + /// Clients can use this in conjunction with capacity() to write past the end + /// of the buffer when they know that more elements are available, and only + /// update the size later. This avoids the cost of value initializing elements + /// which will only be overwritten. + void set_size(size_t N) { + assert(N <= capacity()); + Size = N; + } +}; + +template +using SmallVectorSizeType = + std::conditional_t= 8, uint64_t, uint32_t>; + +/// Figure out the offset of the first element. +template +struct SmallVectorAlignmentAndSize { + // NOLINTNEXTLINE(*c-arrays*) + alignas(SmallVectorBase>) char Base[sizeof( + SmallVectorBase>)]; + // NOLINTNEXTLINE(*c-arrays*) + alignas(T) char FirstEl[sizeof(T)]; +}; + +/// This is the part of SmallVectorTemplateBase which does not depend on whether +/// the type T is a POD. The extra dummy template argument is used by ArrayRef +/// to avoid unnecessarily requiring T to be complete. +template +class SmallVectorTemplateCommon + : public SmallVectorBase> { + using Base = SmallVectorBase>; + + /// Find the address of the first element. For this pointer math to be valid + /// with small-size of 0 for T with lots of alignment, it's important that + /// SmallVectorStorage is properly-aligned even for small-size of 0. + void* getFirstEl() const { + return const_cast(reinterpret_cast( + reinterpret_cast(this) + + offsetof(SmallVectorAlignmentAndSize, FirstEl))); + } + // Space after 'FirstEl' is clobbered, do not add any instance vars after it. + + protected: + SmallVectorTemplateCommon(size_t Size) : Base(getFirstEl(), Size) {} + + void grow_pod(size_t MinSize, size_t TSize) { + Base::grow_pod(getFirstEl(), MinSize, TSize); + } + + /// Return true if this is a smallvector which has not had dynamic + /// memory allocated for it. + bool isSmall() const { + return this->BeginX == getFirstEl(); + } + + /// Put this vector in a state of being small. + void resetToSmall() { + this->BeginX = getFirstEl(); + this->Size = this->Capacity = 0; // FIXME: Setting Capacity to 0 is suspect. + } + + /// Return true if V is an internal reference to the given range. + bool isReferenceToRange(const void* V, const void* First, const void* Last) + const { + // Use std::less to avoid UB. + std::less<> LessThan; + return !LessThan(V, First) && LessThan(V, Last); + } + + /// Return true if V is an internal reference to this vector. + bool isReferenceToStorage(const void* V) const { + return isReferenceToRange(V, this->begin(), this->end()); + } + + /// Return true if First and Last form a valid (possibly empty) range in this + /// vector's storage. + bool isRangeInStorage(const void* First, const void* Last) const { + // Use std::less to avoid UB. + std::less<> LessThan; + return !LessThan(First, this->begin()) && !LessThan(Last, First) && + !LessThan(this->end(), Last); + } + + /// Return true unless Elt will be invalidated by resizing the vector to + /// NewSize. + bool isSafeToReferenceAfterResize(const void* Elt, size_t NewSize) { + // Past the end. + if (C10_LIKELY(!isReferenceToStorage(Elt))) + return true; + + // Return false if Elt will be destroyed by shrinking. + if (NewSize <= this->size()) + return Elt < this->begin() + NewSize; + + // Return false if we need to grow. + return NewSize <= this->capacity(); + } + + /// Check whether Elt will be invalidated by resizing the vector to NewSize. + void assertSafeToReferenceAfterResize(const void* Elt, size_t NewSize) { + (void)Elt; // Suppress unused variable warning + (void)NewSize; // Suppress unused variable warning + assert( + isSafeToReferenceAfterResize(Elt, NewSize) && + "Attempting to reference an element of the vector in an operation " + "that invalidates it"); + } + + /// Check whether Elt will be invalidated by increasing the size of the + /// vector by N. + void assertSafeToAdd(const void* Elt, size_t N = 1) { + this->assertSafeToReferenceAfterResize(Elt, this->size() + N); + } + + /// Check whether any part of the range will be invalidated by clearing. + void assertSafeToReferenceAfterClear(const T* From, const T* To) { + if (From == To) + return; + this->assertSafeToReferenceAfterResize(From, 0); + this->assertSafeToReferenceAfterResize(To - 1, 0); + } + template < + class ItTy, + std::enable_if_t, T*>, bool> = + false> + void assertSafeToReferenceAfterClear(ItTy, ItTy) {} + + /// Check whether any part of the range will be invalidated by growing. + void assertSafeToAddRange(const T* From, const T* To) { + if (From == To) + return; + this->assertSafeToAdd(From, To - From); + this->assertSafeToAdd(To - 1, To - From); + } + template < + class ItTy, + std::enable_if_t, T*>, bool> = + false> + void assertSafeToAddRange(ItTy, ItTy) {} + + /// Reserve enough space to add one element, and return the updated element + /// pointer in case it was a reference to the storage. + template + static const T* reserveForParamAndGetAddressImpl( + U* This, + const T& Elt, + size_t N) { + size_t NewSize = This->size() + N; + if (C10_LIKELY(NewSize <= This->capacity())) + return &Elt; + + bool ReferencesStorage = false; + int64_t Index = -1; + if constexpr (!U::TakesParamByValue) { + if (C10_UNLIKELY(This->isReferenceToStorage(&Elt))) { + ReferencesStorage = true; + Index = &Elt - This->begin(); + } + } + This->grow(NewSize); + return ReferencesStorage ? This->begin() + Index : &Elt; + } + + public: + using size_type = size_t; + using difference_type = ptrdiff_t; + using value_type = T; + using iterator = T*; + using const_iterator = const T*; + + using const_reverse_iterator = std::reverse_iterator; + using reverse_iterator = std::reverse_iterator; + + using reference = T&; + using const_reference = const T&; + using pointer = T*; + using const_pointer = const T*; + + using Base::capacity; + using Base::empty; + using Base::size; + + // forward iterator creation methods. + iterator begin() { + return (iterator)this->BeginX; + } + const_iterator begin() const { + return (const_iterator)this->BeginX; + } + iterator end() { + return begin() + size(); + } + const_iterator end() const { + return begin() + size(); + } + + // reverse iterator creation methods. + reverse_iterator rbegin() { + return reverse_iterator(end()); + } + const_reverse_iterator rbegin() const { + return const_reverse_iterator(end()); + } + reverse_iterator rend() { + return reverse_iterator(begin()); + } + const_reverse_iterator rend() const { + return const_reverse_iterator(begin()); + } + + size_type size_in_bytes() const { + return size() * sizeof(T); + } + constexpr size_type max_size() const { + return std::min(this->SizeTypeMax(), size_type(-1) / sizeof(T)); + } + + size_t capacity_in_bytes() const { + return capacity() * sizeof(T); + } + + /// Return a pointer to the vector's buffer, even if empty(). + pointer data() { + return pointer(begin()); + } + /// Return a pointer to the vector's buffer, even if empty(). + const_pointer data() const { + return const_pointer(begin()); + } + + // SmallVector::at is NOT from LLVM. + reference at(size_type idx) { + assert(idx < size()); + return begin()[idx]; + } + const_reference at(size_type idx) const { + assert(idx < size()); + return begin()[idx]; + } + reference operator[](size_type idx) { + assert(idx < size()); + return begin()[idx]; + } + const_reference operator[](size_type idx) const { + assert(idx < size()); + return begin()[idx]; + } + + reference front() { + assert(!empty()); + return begin()[0]; + } + const_reference front() const { + assert(!empty()); + return begin()[0]; + } + + reference back() { + assert(!empty()); + return end()[-1]; + } + const_reference back() const { + assert(!empty()); + return end()[-1]; + } +}; + +/// SmallVectorTemplateBase - This is where we put +/// method implementations that are designed to work with non-trivial T's. +/// +/// We approximate is_trivially_copyable with trivial move/copy construction and +/// trivial destruction. While the standard doesn't specify that you're allowed +/// copy these types with memcpy, there is no way for the type to observe this. +/// This catches the important case of std::pair, which is not +/// trivially assignable. +/// +/// XXX: if build fails here fall back to C10_IS_TRIVIALLY_COPYABLE and make a +/// note +template < + typename T, + bool = (std::is_trivially_copy_constructible_v) && + (std::is_trivially_move_constructible_v) && + std::is_trivially_destructible_v> +class SmallVectorTemplateBase : public SmallVectorTemplateCommon { + friend class SmallVectorTemplateCommon; + + protected: + static constexpr bool TakesParamByValue = false; + using ValueParamT = const T&; + + SmallVectorTemplateBase(size_t Size) : SmallVectorTemplateCommon(Size) {} + + static void destroy_range(T* S, T* E) { + while (S != E) { + --E; + E->~T(); + } + } + + /// Move the range [I, E) into the uninitialized memory starting with "Dest", + /// constructing elements as needed. + template + static void uninitialized_move(It1 I, It1 E, It2 Dest) { + std::uninitialized_copy( + std::make_move_iterator(I), std::make_move_iterator(E), Dest); + } + + /// Copy the range [I, E) onto the uninitialized memory starting with "Dest", + /// constructing elements as needed. + template + static void uninitialized_copy(It1 I, It1 E, It2 Dest) { + std::uninitialized_copy(I, E, Dest); + } + + /// Grow the allocated memory (without initializing new elements), doubling + /// the size of the allocated memory. Guarantees space for at least one more + /// element, or MinSize more elements if specified. + void grow(size_t MinSize = 0); + + /// Create a new allocation big enough for \p MinSize and pass back its size + /// in \p NewCapacity. This is the first section of \a grow(). + T* mallocForGrow(size_t MinSize, size_t& NewCapacity) { + return static_cast( + SmallVectorBase>::mallocForGrow( + MinSize, sizeof(T), NewCapacity)); + } + + /// Move existing elements over to the new allocation \p NewElts, the middle + /// section of \a grow(). + void moveElementsForGrow(T* NewElts); + + /// Transfer ownership of the allocation, finishing up \a grow(). + void takeAllocationForGrow(T* NewElts, size_t NewCapacity); + + /// Reserve enough space to add one element, and return the updated element + /// pointer in case it was a reference to the storage. + const T* reserveForParamAndGetAddress(const T& Elt, size_t N = 1) { + return this->reserveForParamAndGetAddressImpl(this, Elt, N); + } + + /// Reserve enough space to add one element, and return the updated element + /// pointer in case it was a reference to the storage. + T* reserveForParamAndGetAddress(T& Elt, size_t N = 1) { + return const_cast(this->reserveForParamAndGetAddressImpl(this, Elt, N)); + } + + static T&& forward_value_param(T&& V) { + return std::move(V); + } + static const T& forward_value_param(const T& V) { + return V; + } + + void growAndAssign(size_t NumElts, const T& Elt) { + // Grow manually in case Elt is an internal reference. + size_t NewCapacity = 0; + T* NewElts = mallocForGrow(NumElts, NewCapacity); + std::uninitialized_fill_n(NewElts, NumElts, Elt); + this->destroy_range(this->begin(), this->end()); + takeAllocationForGrow(NewElts, NewCapacity); + this->set_size(NumElts); + } + + template + T& growAndEmplaceBack(ArgTypes&&... Args) { + // Grow manually in case one of Args is an internal reference. + size_t NewCapacity = 0; + T* NewElts = mallocForGrow(0, NewCapacity); + ::new ((void*)(NewElts + this->size())) T(std::forward(Args)...); + moveElementsForGrow(NewElts); + takeAllocationForGrow(NewElts, NewCapacity); + this->set_size(this->size() + 1); + return this->back(); + } + + public: + void push_back(const T& Elt) { + const T* EltPtr = reserveForParamAndGetAddress(Elt); + ::new ((void*)this->end()) T(*EltPtr); + this->set_size(this->size() + 1); + } + + // NOLINTNEXTLINE(cppcoreguidelines-rvalue-reference-param-not-moved) + void push_back(T&& Elt) { + T* EltPtr = reserveForParamAndGetAddress(Elt); + ::new ((void*)this->end()) T(::std::move(*EltPtr)); + this->set_size(this->size() + 1); + } + + void pop_back() { + this->set_size(this->size() - 1); + this->end()->~T(); + } +}; + +// Define this out-of-line to dissuade the C++ compiler from inlining it. +template +void SmallVectorTemplateBase::grow(size_t MinSize) { + size_t NewCapacity = 0; + T* NewElts = mallocForGrow(MinSize, NewCapacity); + moveElementsForGrow(NewElts); + takeAllocationForGrow(NewElts, NewCapacity); +} + +// Define this out-of-line to dissuade the C++ compiler from inlining it. +template +void SmallVectorTemplateBase::moveElementsForGrow( + T* NewElts) { + // Move the elements over. + this->uninitialized_move(this->begin(), this->end(), NewElts); + + // Destroy the original elements. + destroy_range(this->begin(), this->end()); +} + +// Define this out-of-line to dissuade the C++ compiler from inlining it. +template +void SmallVectorTemplateBase::takeAllocationForGrow( + T* NewElts, + size_t NewCapacity) { + // If this wasn't grown from the inline copy, deallocate the old space. + if (!this->isSmall()) + free(this->begin()); + + this->BeginX = NewElts; + this->Capacity = NewCapacity; +} + +/// SmallVectorTemplateBase - This is where we put +/// method implementations that are designed to work with trivially copyable +/// T's. This allows using memcpy in place of copy/move construction and +/// skipping destruction. +template +class SmallVectorTemplateBase : public SmallVectorTemplateCommon { + friend class SmallVectorTemplateCommon; + + protected: + /// True if it's cheap enough to take parameters by value. Doing so avoids + /// overhead related to mitigations for reference invalidation. + static constexpr bool TakesParamByValue = sizeof(T) <= 2 * sizeof(void*); + + /// Either const T& or T, depending on whether it's cheap enough to take + /// parameters by value. + using ValueParamT = std::conditional_t; + + SmallVectorTemplateBase(size_t Size) : SmallVectorTemplateCommon(Size) {} + + // No need to do a destroy loop for POD's. + static void destroy_range(T*, T*) {} + + /// Move the range [I, E) onto the uninitialized memory + /// starting with "Dest", constructing elements into it as needed. + template + static void uninitialized_move(It1 I, It1 E, It2 Dest) { + // Just do a copy. + uninitialized_copy(I, E, Dest); + } + + /// Copy the range [I, E) onto the uninitialized memory + /// starting with "Dest", constructing elements into it as needed. + template + static void uninitialized_copy(It1 I, It1 E, It2 Dest) { + // Arbitrary iterator types; just use the basic implementation. + std::uninitialized_copy(I, E, Dest); + } + + /// Copy the range [I, E) onto the uninitialized memory + /// starting with "Dest", constructing elements into it as needed. + template + static void uninitialized_copy( + T1* I, + T1* E, + T2* Dest, + std::enable_if_t, T2>>* = + nullptr) { + // Use memcpy for PODs iterated by pointers (which includes SmallVector + // iterators): std::uninitialized_copy optimizes to memmove, but we can + // use memcpy here. Note that I and E are iterators and thus might be + // invalid for memcpy if they are equal. + if (I != E) + memcpy(reinterpret_cast(Dest), I, (E - I) * sizeof(T)); + } + + /// Double the size of the allocated memory, guaranteeing space for at + /// least one more element or MinSize if specified. + void grow(size_t MinSize = 0) { + this->grow_pod(MinSize, sizeof(T)); + } + + /// Reserve enough space to add one element, and return the updated element + /// pointer in case it was a reference to the storage. + const T* reserveForParamAndGetAddress(const T& Elt, size_t N = 1) { + return this->reserveForParamAndGetAddressImpl(this, Elt, N); + } + + /// Reserve enough space to add one element, and return the updated element + /// pointer in case it was a reference to the storage. + T* reserveForParamAndGetAddress(T& Elt, size_t N = 1) { + return const_cast(this->reserveForParamAndGetAddressImpl(this, Elt, N)); + } + + /// Copy \p V or return a reference, depending on \a ValueParamT. + static ValueParamT forward_value_param(ValueParamT V) { + return V; + } + + void growAndAssign(size_t NumElts, T Elt) { + // Elt has been copied in case it's an internal reference, side-stepping + // reference invalidation problems without losing the realloc optimization. + this->set_size(0); + this->grow(NumElts); + std::uninitialized_fill_n(this->begin(), NumElts, Elt); + this->set_size(NumElts); + } + + template + T& growAndEmplaceBack(ArgTypes&&... Args) { + // Use push_back with a copy in case Args has an internal reference, + // side-stepping reference invalidation problems without losing the realloc + // optimization. + push_back(T(std::forward(Args)...)); + return this->back(); + } + + public: + void push_back(ValueParamT Elt) { + const T* EltPtr = reserveForParamAndGetAddress(Elt); + memcpy(reinterpret_cast(this->end()), EltPtr, sizeof(T)); + this->set_size(this->size() + 1); + } + + void pop_back() { + this->set_size(this->size() - 1); + } +}; + +/// This class consists of common code factored out of the SmallVector class to +/// reduce code duplication based on the SmallVector 'N' template parameter. +template +class SmallVectorImpl : public SmallVectorTemplateBase { + using SuperClass = SmallVectorTemplateBase; + + public: + using iterator = typename SuperClass::iterator; + using const_iterator = typename SuperClass::const_iterator; + using reference = typename SuperClass::reference; + using size_type = typename SuperClass::size_type; + + protected: + using SmallVectorTemplateBase::TakesParamByValue; + using ValueParamT = typename SuperClass::ValueParamT; + + // Default ctor - Initialize to empty. + explicit SmallVectorImpl(unsigned N) : SmallVectorTemplateBase(N) {} + + public: + SmallVectorImpl(const SmallVectorImpl&) = delete; + + ~SmallVectorImpl() { + // Subclass has already destructed this vector's elements. + // If this wasn't grown from the inline copy, deallocate the old space. + if (!this->isSmall()) + free(this->begin()); + } + + void clear() { + this->destroy_range(this->begin(), this->end()); + this->Size = 0; + } + + private: + template + void resizeImpl(size_type N) { + if (N < this->size()) { + this->pop_back_n(this->size() - N); + } else if (N > this->size()) { + this->reserve(N); + for (auto I = this->end(), E = this->begin() + N; I != E; ++I) + if (ForOverwrite) + new (&*I) T; + else + new (&*I) T(); + this->set_size(N); + } + } + + public: + void resize(size_type N) { + resizeImpl(N); + } + + /// Like resize, but \ref T is POD, the new values won't be initialized. + void resize_for_overwrite(size_type N) { + resizeImpl(N); + } + + void resize(size_type N, ValueParamT NV) { + if (N == this->size()) + return; + + if (N < this->size()) { + this->pop_back_n(this->size() - N); + return; + } + + // N > this->size(). Defer to append. + this->append(N - this->size(), NV); + } + + void reserve(size_type N) { + if (this->capacity() < N) + this->grow(N); + } + + void pop_back_n(size_type NumItems) { + assert(this->size() >= NumItems); + this->destroy_range(this->end() - NumItems, this->end()); + this->set_size(this->size() - NumItems); + } + + [[nodiscard]] T pop_back_val() { + T Result = ::std::move(this->back()); + this->pop_back(); + return Result; + } + + void swap(SmallVectorImpl& RHS) noexcept; + + /// Add the specified range to the end of the SmallVector. + template < + typename in_iter, + typename = std::enable_if_t::iterator_category, + std::input_iterator_tag>>> + void append(in_iter in_start, in_iter in_end) { + this->assertSafeToAddRange(in_start, in_end); + size_type NumInputs = std::distance(in_start, in_end); + this->reserve(this->size() + NumInputs); + this->uninitialized_copy(in_start, in_end, this->end()); + this->set_size(this->size() + NumInputs); + } + + /// Append \p NumInputs copies of \p Elt to the end. + void append(size_type NumInputs, ValueParamT Elt) { + const T* EltPtr = this->reserveForParamAndGetAddress(Elt, NumInputs); + std::uninitialized_fill_n(this->end(), NumInputs, *EltPtr); + this->set_size(this->size() + NumInputs); + } + + void append(std::initializer_list IL) { + append(IL.begin(), IL.end()); + } + + void append(const SmallVectorImpl& RHS) { + append(RHS.begin(), RHS.end()); + } + + void assign(size_type NumElts, ValueParamT Elt) { + // Note that Elt could be an internal reference. + if (NumElts > this->capacity()) { + this->growAndAssign(NumElts, Elt); + return; + } + + // Assign over existing elements. + std::fill_n(this->begin(), std::min(NumElts, this->size()), Elt); + if (NumElts > this->size()) + std::uninitialized_fill_n(this->end(), NumElts - this->size(), Elt); + else if (NumElts < this->size()) + this->destroy_range(this->begin() + NumElts, this->end()); + this->set_size(NumElts); + } + + // FIXME: Consider assigning over existing elements, rather than clearing & + // re-initializing them - for all assign(...) variants. + + template < + typename in_iter, + typename = std::enable_if_t::iterator_category, + std::input_iterator_tag>>> + void assign(in_iter in_start, in_iter in_end) { + this->assertSafeToReferenceAfterClear(in_start, in_end); + clear(); + append(in_start, in_end); + } + + void assign(std::initializer_list IL) { + clear(); + append(IL); + } + + void assign(const SmallVectorImpl& RHS) { + assign(RHS.begin(), RHS.end()); + } + + iterator erase(iterator I) { + assert( + this->isReferenceToStorage(I) && "Iterator to erase is out of bounds."); + + iterator N = I; + // Shift all elts down one. + std::move(I + 1, this->end(), I); + // Drop the last elt. + this->pop_back(); + return (N); + } + + iterator erase(iterator S, iterator E) { + assert(this->isRangeInStorage(S, E) && "Range to erase is out of bounds."); + + iterator N = S; + // Shift all elts down. + iterator I = std::move(E, this->end(), S); + // Drop the last elts. + this->destroy_range(I, this->end()); + this->set_size(I - this->begin()); + return (N); + } + + private: + template + iterator insert_one_impl(iterator I, ArgType&& Elt) { + // Callers ensure that ArgType is derived from T. + static_assert( + std::is_same>, T>:: + value, + "ArgType must be derived from T!"); + + if (I == this->end()) { // Important special case for empty vector. + this->push_back(::std::forward(Elt)); + return this->end() - 1; + } + + assert( + this->isReferenceToStorage(I) && + "Insertion iterator is out of bounds."); + + // Grow if necessary. + size_t Index = I - this->begin(); + std::remove_reference_t* EltPtr = + this->reserveForParamAndGetAddress(Elt); + I = this->begin() + Index; + + ::new ((void*)this->end()) T(::std::move(this->back())); + // Push everything else over. + std::move_backward(I, this->end() - 1, this->end()); + this->set_size(this->size() + 1); + + // If we just moved the element we're inserting, be sure to update + // the reference (never happens if TakesParamByValue). + static_assert( + !TakesParamByValue || std::is_same_v, + "ArgType must be 'T' when taking by value!"); + if (!TakesParamByValue && this->isReferenceToRange(EltPtr, I, this->end())) + ++EltPtr; + + *I = ::std::forward(*EltPtr); + return I; + } + + public: + iterator insert(iterator I, T&& Elt) { + return insert_one_impl(I, this->forward_value_param(std::move(Elt))); + } + + iterator insert(iterator I, const T& Elt) { + return insert_one_impl(I, this->forward_value_param(Elt)); + } + + iterator insert(iterator I, size_type NumToInsert, ValueParamT Elt) { + // Convert iterator to elt# to avoid invalidating iterator when we reserve() + size_t InsertElt = I - this->begin(); + + if (I == this->end()) { // Important special case for empty vector. + append(NumToInsert, Elt); + return this->begin() + InsertElt; + } + + assert( + this->isReferenceToStorage(I) && + "Insertion iterator is out of bounds."); + + // Ensure there is enough space, and get the (maybe updated) address of + // Elt. + const T* EltPtr = this->reserveForParamAndGetAddress(Elt, NumToInsert); + + // Uninvalidate the iterator. + I = this->begin() + InsertElt; + + // If there are more elements between the insertion point and the end of the + // range than there are being inserted, we can use a simple approach to + // insertion. Since we already reserved space, we know that this won't + // reallocate the vector. + if (size_t(this->end() - I) >= NumToInsert) { + T* OldEnd = this->end(); + append( + std::move_iterator(this->end() - NumToInsert), + std::move_iterator(this->end())); + + // Copy the existing elements that get replaced. + std::move_backward(I, OldEnd - NumToInsert, OldEnd); + + // If we just moved the element we're inserting, be sure to update + // the reference (never happens if TakesParamByValue). + if (!TakesParamByValue && I <= EltPtr && EltPtr < this->end()) + EltPtr += NumToInsert; + + std::fill_n(I, NumToInsert, *EltPtr); + return I; + } + + // Otherwise, we're inserting more elements than exist already, and we're + // not inserting at the end. + + // Move over the elements that we're about to overwrite. + T* OldEnd = this->end(); + this->set_size(this->size() + NumToInsert); + size_t NumOverwritten = OldEnd - I; + this->uninitialized_move(I, OldEnd, this->end() - NumOverwritten); + + // If we just moved the element we're inserting, be sure to update + // the reference (never happens if TakesParamByValue). + if (!TakesParamByValue && I <= EltPtr && EltPtr < this->end()) + EltPtr += NumToInsert; + + // Replace the overwritten part. + std::fill_n(I, NumOverwritten, *EltPtr); + + // Insert the non-overwritten middle part. + std::uninitialized_fill_n(OldEnd, NumToInsert - NumOverwritten, *EltPtr); + return I; + } + + template < + typename ItTy, + typename = std::enable_if_t::iterator_category, + std::input_iterator_tag>>> + iterator insert(iterator I, ItTy From, ItTy To) { + // Convert iterator to elt# to avoid invalidating iterator when we reserve() + size_t InsertElt = I - this->begin(); + + if (I == this->end()) { // Important special case for empty vector. + append(From, To); + return this->begin() + InsertElt; + } + + assert( + this->isReferenceToStorage(I) && + "Insertion iterator is out of bounds."); + + // Check that the reserve that follows doesn't invalidate the iterators. + this->assertSafeToAddRange(From, To); + + size_t NumToInsert = std::distance(From, To); + + // Ensure there is enough space. + reserve(this->size() + NumToInsert); + + // Uninvalidate the iterator. + I = this->begin() + InsertElt; + + // If there are more elements between the insertion point and the end of the + // range than there are being inserted, we can use a simple approach to + // insertion. Since we already reserved space, we know that this won't + // reallocate the vector. + if (size_t(this->end() - I) >= NumToInsert) { + T* OldEnd = this->end(); + append( + std::move_iterator(this->end() - NumToInsert), + std::move_iterator(this->end())); + + // Copy the existing elements that get replaced. + std::move_backward(I, OldEnd - NumToInsert, OldEnd); + + std::copy(From, To, I); + return I; + } + + // Otherwise, we're inserting more elements than exist already, and we're + // not inserting at the end. + + // Move over the elements that we're about to overwrite. + T* OldEnd = this->end(); + this->set_size(this->size() + NumToInsert); + size_t NumOverwritten = OldEnd - I; + this->uninitialized_move(I, OldEnd, this->end() - NumOverwritten); + + // Replace the overwritten part. + for (T* J = I; NumOverwritten > 0; --NumOverwritten) { + *J = *From; + ++J; + ++From; + } + + // Insert the non-overwritten middle part. + this->uninitialized_copy(From, To, OldEnd); + return I; + } + + void insert(iterator I, std::initializer_list IL) { + insert(I, IL.begin(), IL.end()); + } + + template + reference emplace_back(ArgTypes&&... Args) { + if (C10_UNLIKELY(this->size() >= this->capacity())) + return this->growAndEmplaceBack(std::forward(Args)...); + + ::new ((void*)this->end()) T(std::forward(Args)...); + this->set_size(this->size() + 1); + return this->back(); + } + + SmallVectorImpl& operator=(const SmallVectorImpl& RHS); + + SmallVectorImpl& operator=(SmallVectorImpl&& RHS) noexcept( + std::is_nothrow_move_constructible_v && + std::is_nothrow_destructible_v); + + bool operator==(const SmallVectorImpl& RHS) const { + if (this->size() != RHS.size()) + return false; + return std::equal(this->begin(), this->end(), RHS.begin()); + } + bool operator!=(const SmallVectorImpl& RHS) const { + return !(*this == RHS); + } + + bool operator<(const SmallVectorImpl& RHS) const { + return std::lexicographical_compare( + this->begin(), this->end(), RHS.begin(), RHS.end()); + } +}; + +template +void SmallVectorImpl::swap(SmallVectorImpl& RHS) noexcept { + if (this == &RHS) + return; + + // We can only avoid copying elements if neither vector is small. + if (!this->isSmall() && !RHS.isSmall()) { + std::swap(this->BeginX, RHS.BeginX); + std::swap(this->Size, RHS.Size); + std::swap(this->Capacity, RHS.Capacity); + return; + } + this->reserve(RHS.size()); + RHS.reserve(this->size()); + + // Swap the shared elements. + size_t NumShared = this->size(); + if (NumShared > RHS.size()) + NumShared = RHS.size(); + for (size_type i = 0; i != NumShared; ++i) + std::swap((*this)[i], RHS[i]); + + // Copy over the extra elts. + if (this->size() > RHS.size()) { + size_t EltDiff = this->size() - RHS.size(); + this->uninitialized_copy(this->begin() + NumShared, this->end(), RHS.end()); + RHS.set_size(RHS.size() + EltDiff); + this->destroy_range(this->begin() + NumShared, this->end()); + this->set_size(NumShared); + } else if (RHS.size() > this->size()) { + size_t EltDiff = RHS.size() - this->size(); + this->uninitialized_copy(RHS.begin() + NumShared, RHS.end(), this->end()); + this->set_size(this->size() + EltDiff); + this->destroy_range(RHS.begin() + NumShared, RHS.end()); + RHS.set_size(NumShared); + } +} + +template +SmallVectorImpl& SmallVectorImpl::operator=( + const SmallVectorImpl& RHS) { + // Avoid self-assignment. + if (this == &RHS) + return *this; + + // If we already have sufficient space, assign the common elements, then + // destroy any excess. + size_t RHSSize = RHS.size(); + size_t CurSize = this->size(); + if (CurSize >= RHSSize) { + // Assign common elements. + iterator NewEnd; + if (RHSSize) + NewEnd = std::copy(RHS.begin(), RHS.begin() + RHSSize, this->begin()); + else + NewEnd = this->begin(); + + // Destroy excess elements. + this->destroy_range(NewEnd, this->end()); + + // Trim. + this->set_size(RHSSize); + return *this; + } + + // If we have to grow to have enough elements, destroy the current elements. + // This allows us to avoid copying them during the grow. + // FIXME: don't do this if they're efficiently moveable. + if (this->capacity() < RHSSize) { + // Destroy current elements. + this->clear(); + CurSize = 0; + this->grow(RHSSize); + } else if (CurSize) { + // Otherwise, use assignment for the already-constructed elements. + std::copy(RHS.begin(), RHS.begin() + CurSize, this->begin()); + } + + // Copy construct the new elements in place. + this->uninitialized_copy( + RHS.begin() + CurSize, RHS.end(), this->begin() + CurSize); + + // Set end. + this->set_size(RHSSize); + return *this; +} + +template +SmallVectorImpl& SmallVectorImpl:: +operator=(SmallVectorImpl&& RHS) noexcept( + std::is_nothrow_move_constructible_v && + std::is_nothrow_destructible_v) { + // Avoid self-assignment. + if (this == &RHS) + return *this; + + // If the RHS isn't small, clear this vector and then steal its buffer. + if (!RHS.isSmall()) { + this->destroy_range(this->begin(), this->end()); + if (!this->isSmall()) + free(this->begin()); + this->BeginX = RHS.BeginX; + this->Size = RHS.Size; + this->Capacity = RHS.Capacity; + RHS.resetToSmall(); + return *this; + } + + // If we already have sufficient space, assign the common elements, then + // destroy any excess. + size_t RHSSize = RHS.size(); + size_t CurSize = this->size(); + if (CurSize >= RHSSize) { + // Assign common elements. + iterator NewEnd = this->begin(); + if (RHSSize) + NewEnd = std::move(RHS.begin(), RHS.end(), NewEnd); + + // Destroy excess elements and trim the bounds. + this->destroy_range(NewEnd, this->end()); + this->set_size(RHSSize); + + // Clear the RHS. + RHS.clear(); + + return *this; + } + + // If we have to grow to have enough elements, destroy the current elements. + // This allows us to avoid copying them during the grow. + // FIXME: this may not actually make any sense if we can efficiently move + // elements. + if (this->capacity() < RHSSize) { + // Destroy current elements. + this->clear(); + CurSize = 0; + this->grow(RHSSize); + } else if (CurSize) { + // Otherwise, use assignment for the already-constructed elements. + std::move(RHS.begin(), RHS.begin() + CurSize, this->begin()); + } + + // Move-construct the new elements in place. + this->uninitialized_move( + RHS.begin() + CurSize, RHS.end(), this->begin() + CurSize); + + // Set end. + this->set_size(RHSSize); + + RHS.clear(); + return *this; +} + +/// Storage for the SmallVector elements. This is specialized for the N=0 case +/// to avoid allocating unnecessary storage. +template +struct SmallVectorStorage { + alignas(T) char InlineElts[N * sizeof(T)]; +}; + +/// We need the storage to be properly aligned even for small-size of 0 so that +/// the pointer math in \a SmallVectorTemplateCommon::getFirstEl() is +/// well-defined. +template +struct alignas(T) SmallVectorStorage {}; + +/// Forward declaration of SmallVector so that +/// calculateSmallVectorDefaultInlinedElements can reference +/// `sizeof(SmallVector)`. +template +class /* LLVM_GSL_OWNER */ SmallVector; + +/// Helper class for calculating the default number of inline elements for +/// `SmallVector`. +/// +/// This should be migrated to a constexpr function when our minimum +/// compiler support is enough for multi-statement constexpr functions. +template +struct CalculateSmallVectorDefaultInlinedElements { + // Parameter controlling the default number of inlined elements + // for `SmallVector`. + // + // The default number of inlined elements ensures that + // 1. There is at least one inlined element. + // 2. `sizeof(SmallVector) <= kPreferredSmallVectorSizeof` unless + // it contradicts 1. + static constexpr size_t kPreferredSmallVectorSizeof = 64; + + // static_assert that sizeof(T) is not "too big". + // + // Because our policy guarantees at least one inlined element, it is possible + // for an arbitrarily large inlined element to allocate an arbitrarily large + // amount of inline storage. We generally consider it an antipattern for a + // SmallVector to allocate an excessive amount of inline storage, so we want + // to call attention to these cases and make sure that users are making an + // intentional decision if they request a lot of inline storage. + // + // We want this assertion to trigger in pathological cases, but otherwise + // not be too easy to hit. To accomplish that, the cutoff is actually somewhat + // larger than kPreferredSmallVectorSizeof (otherwise, + // `SmallVector>` would be one easy way to trip it, and that + // pattern seems useful in practice). + // + // One wrinkle is that this assertion is in theory non-portable, since + // sizeof(T) is in general platform-dependent. However, we don't expect this + // to be much of an issue, because most LLVM development happens on 64-bit + // hosts, and therefore sizeof(T) is expected to *decrease* when compiled for + // 32-bit hosts, dodging the issue. The reverse situation, where development + // happens on a 32-bit host and then fails due to sizeof(T) *increasing* on a + // 64-bit host, is expected to be very rare. + static_assert( + sizeof(T) <= 256, + "You are trying to use a default number of inlined elements for " + "`SmallVector` but `sizeof(T)` is really big! Please use an " + "explicit number of inlined elements with `SmallVector` to make " + "sure you really want that much inline storage."); + + // Discount the size of the header itself when calculating the maximum inline + // bytes. + static constexpr size_t PreferredInlineBytes = + kPreferredSmallVectorSizeof - sizeof(SmallVector); + static constexpr size_t NumElementsThatFit = PreferredInlineBytes / sizeof(T); + static constexpr size_t value = + NumElementsThatFit == 0 ? 1 : NumElementsThatFit; +}; + +/// This is a 'vector' (really, a variable-sized array), optimized +/// for the case when the array is small. It contains some number of elements +/// in-place, which allows it to avoid heap allocation when the actual number of +/// elements is below that threshold. This allows normal "small" cases to be +/// fast without losing generality for large inputs. +/// +/// \note +/// In the absence of a well-motivated choice for the number of inlined +/// elements \p N, it is recommended to use \c SmallVector (that is, +/// omitting the \p N). This will choose a default number of inlined elements +/// reasonable for allocation on the stack (for example, trying to keep \c +/// sizeof(SmallVector) around 64 bytes). +/// +/// \warning This does not attempt to be exception safe. +/// +/// \see https://llvm.org/docs/ProgrammersManual.html#llvm-adt-smallvector-h +template < + typename T, + unsigned N = CalculateSmallVectorDefaultInlinedElements::value> +class /* LLVM_GSL_OWNER */ SmallVector : public SmallVectorImpl, + SmallVectorStorage { + public: + SmallVector() : SmallVectorImpl(N) {} + + ~SmallVector() { + // Destroy the constructed elements in the vector. + this->destroy_range(this->begin(), this->end()); + } + + explicit SmallVector(size_t Size, const T& Value = T()) + : SmallVectorImpl(N) { + this->assign(Size, Value); + } + + template < + typename ItTy, + typename = std::enable_if_t::iterator_category, + std::input_iterator_tag>>> + SmallVector(ItTy S, ItTy E) : SmallVectorImpl(N) { + this->append(S, E); + } + + // note: The enable_if restricts Container to types that have a .begin() and + // .end() that return valid input iterators. + template < + typename Container, + std::enable_if_t< + std::is_convertible_v< + typename std::iterator_traits< + decltype(std::declval() + .begin())>::iterator_category, + std::input_iterator_tag> && + std::is_convertible_v< + typename std::iterator_traits< + decltype(std::declval() + .end())>::iterator_category, + std::input_iterator_tag>, + int> = 0> + explicit SmallVector(Container&& c) : SmallVectorImpl(N) { + this->append(c.begin(), c.end()); + } + + SmallVector(std::initializer_list IL) : SmallVectorImpl(N) { + this->assign(IL); + } + + SmallVector(const SmallVector& RHS) : SmallVectorImpl(N) { + if (!RHS.empty()) + SmallVectorImpl::operator=(RHS); + } + + SmallVector& operator=(const SmallVector& RHS) { + SmallVectorImpl::operator=(RHS); + return *this; + } + + SmallVector(SmallVector&& RHS) noexcept( + std::is_nothrow_move_assignable_v>) + : SmallVectorImpl(N) { + if (!RHS.empty()) + SmallVectorImpl::operator=(::std::move(RHS)); + } + + // note: The enable_if restricts Container to types that have a .begin() and + // .end() that return valid input iterators. + template < + typename Container, + std::enable_if_t< + std::is_convertible_v< + typename std::iterator_traits< + decltype(std::declval() + .begin())>::iterator_category, + std::input_iterator_tag> && + std::is_convertible_v< + typename std::iterator_traits< + decltype(std::declval() + .end())>::iterator_category, + std::input_iterator_tag>, + int> = 0> + SmallVector& operator=(const Container& RHS) { + this->assign(RHS.begin(), RHS.end()); + return *this; + } + + SmallVector(SmallVectorImpl&& RHS) noexcept( + std::is_nothrow_move_assignable_v>) + : SmallVectorImpl(N) { + if (!RHS.empty()) + SmallVectorImpl::operator=(::std::move(RHS)); + } + + SmallVector& operator=(SmallVector&& RHS) noexcept( + std::is_nothrow_move_assignable_v>) { + SmallVectorImpl::operator=(::std::move(RHS)); + return *this; + } + + SmallVector& operator=(SmallVectorImpl&& RHS) noexcept( + std::is_nothrow_move_constructible_v>) { + SmallVectorImpl::operator=(::std::move(RHS)); + return *this; + } + + // note: The enable_if restricts Container to types that have a .begin() and + // .end() that return valid input iterators. + template < + typename Container, + std::enable_if_t< + std::is_convertible_v< + typename std::iterator_traits< + decltype(std::declval() + .begin())>::iterator_category, + std::input_iterator_tag> && + std::is_convertible_v< + typename std::iterator_traits< + decltype(std::declval() + .end())>::iterator_category, + std::input_iterator_tag>, + int> = 0> + // NOLINTNEXTLINE(cppcoreguidelines-missing-std-forward) + SmallVector& operator=(Container&& C) { + this->assign(C.begin(), C.end()); + return *this; + } + + SmallVector& operator=(std::initializer_list IL) { + this->assign(IL); + return *this; + } +}; + +template +inline size_t capacity_in_bytes(const SmallVector& X) { + return X.capacity_in_bytes(); +} + +template +std::ostream& operator<<(std::ostream& out, const SmallVector& list) { + int i = 0; + out << "["; + for (auto e : list) { + if (i++ > 0) + out << ", "; + out << e; + } + out << "]"; + return out; +} + +template +using ValueTypeFromRangeType = std::remove_const_t< + std::remove_reference_t()))>>; + +/// Given a range of type R, iterate the entire range and return a +/// SmallVector with elements of the vector. This is useful, for example, +/// when you want to iterate a range and then sort the results. +template +// NOLINTNEXTLINE(cppcoreguidelines-missing-std-forward) +SmallVector, Size> to_vector(R&& Range) { + return {std::begin(Range), std::end(Range)}; +} +template +SmallVector< + ValueTypeFromRangeType, + CalculateSmallVectorDefaultInlinedElements< + ValueTypeFromRangeType>::value> +// NOLINTNEXTLINE(cppcoreguidelines-missing-std-forward) +to_vector(R&& Range) { + return {std::begin(Range), std::end(Range)}; +} + +} // end namespace c10 + +namespace std { + +/// Implement std::swap in terms of SmallVector swap. +template +inline void swap( + c10::SmallVectorImpl& LHS, + c10::SmallVectorImpl& RHS) noexcept { + LHS.swap(RHS); +} + +/// Implement std::swap in terms of SmallVector swap. +template +inline void swap( + c10::SmallVector& LHS, + c10::SmallVector& RHS) noexcept { + LHS.swap(RHS); +} + +} // end namespace std diff --git a/phivenv/Lib/site-packages/torch/include/c10/util/StringUtil.h b/phivenv/Lib/site-packages/torch/include/c10/util/StringUtil.h new file mode 100644 index 0000000000000000000000000000000000000000..1ad04bff672292b79e297da35bb0ed164adc6888 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/util/StringUtil.h @@ -0,0 +1,262 @@ +#ifndef C10_UTIL_STRINGUTIL_H_ +#define C10_UTIL_STRINGUTIL_H_ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +C10_CLANG_DIAGNOSTIC_PUSH() +#if C10_CLANG_HAS_WARNING("-Wshorten-64-to-32") +C10_CLANG_DIAGNOSTIC_IGNORE("-Wshorten-64-to-32") +#endif + +namespace c10 { + +namespace detail { + +// Obtains the base name from a full path. +C10_API std::string StripBasename(const std::string& full_path); + +C10_API std::string ExcludeFileExtension(const std::string& full_path); + +struct CompileTimeEmptyString { + operator const std::string&() const { + static const std::string empty_string_literal; + return empty_string_literal; + } + operator const char*() const { + return ""; + } +}; + +template +struct CanonicalizeStrTypes { + using type = const T&; +}; + +template +// NOLINTNEXTLINE(*c-arrays*) +struct CanonicalizeStrTypes { + using type = const char*; +}; + +inline std::ostream& _str(std::ostream& ss) { + return ss; +} + +template +struct Streamable : std::false_type {}; + +template +struct Streamable() << T{})> + : std::true_type {}; + +template +inline std::ostream& _str(std::ostream& ss, const T& t) { + if constexpr (std::is_enum_v && !Streamable::value) { + // NOLINTNEXTLINE(modernize-type-traits) + return _str(ss, static_cast::type>(t)); + } else { + // NOLINTNEXTLINE(clang-analyzer-core.CallAndMessage) + ss << t; + return ss; + } +} + +template +inline std::ostream& _str(std::ostream& ss, const std::optional& t) { + if (t.has_value()) { + return _str(ss, t.value()); + } + ss << "std::nullopt"; + return ss; +} +// Overloads of _str for wide types; forces narrowing. +C10_API std::ostream& _str(std::ostream& ss, const wchar_t* wCStr); +C10_API std::ostream& _str(std::ostream& ss, const wchar_t& wChar); +C10_API std::ostream& _str(std::ostream& ss, const std::wstring& wString); + +template <> +inline std::ostream& _str( + std::ostream& ss, + const CompileTimeEmptyString&) { + return ss; +} + +template +inline std::ostream& _str(std::ostream& ss, const T& t, const Args&... args) { + return _str(_str(ss, t), args...); +} + +template +struct _str_wrapper final { + static std::string call(const Args&... args) { + std::ostringstream ss; + _str(ss, args...); + return ss.str(); + } +}; + +// Specializations for already-a-string types. +template <> +struct _str_wrapper final { + // return by reference to avoid the binary size of a string copy + static const std::string& call(const std::string& str) { + return str; + } +}; + +template <> +struct _str_wrapper final { + static const char* call(const char* str) { + return str; + } +}; + +// For c10::str() with an empty argument list (which is common in our assert +// macros), we don't want to pay the binary size for constructing and +// destructing a stringstream or even constructing a string. +template <> +struct _str_wrapper<> final { + static CompileTimeEmptyString call() { + return CompileTimeEmptyString(); + } +}; + +} // namespace detail + +// Convert a list of string-like arguments into a single string. +template +inline decltype(auto) str(const Args&... args) { + return detail::_str_wrapper< + typename detail::CanonicalizeStrTypes::type...>::call(args...); +} + +template +inline std::string Join(const std::string& delimiter, const Container& v) { + std::stringstream s; + int cnt = static_cast(v.size()) - 1; + for (auto i = v.begin(); i != v.end(); ++i, --cnt) { + s << (*i) << (cnt ? delimiter : ""); + } + return std::move(s).str(); +} + +// Replace all occurrences of "from" substring to "to" string. +// Returns number of replacements +size_t C10_API +ReplaceAll(std::string& s, std::string_view from, std::string_view to); + +/// Represents a location in source code (for debugging). +struct C10_API SourceLocation { + const char* function; + const char* file; + uint32_t line; +}; + +std::ostream& operator<<(std::ostream& out, const SourceLocation& loc); + +// unix isprint but insensitive to locale +inline bool isPrint(char s) { + return s > 0x1f && s < 0x7f; +} + +inline void printQuotedString(std::ostream& stmt, const std::string_view str) { + stmt << "\""; + for (auto s : str) { + switch (s) { + case '\\': + stmt << "\\\\"; + break; + case '\'': + stmt << "\\'"; + break; + case '\"': + stmt << "\\\""; + break; + case '\a': + stmt << "\\a"; + break; + case '\b': + stmt << "\\b"; + break; + case '\f': + stmt << "\\f"; + break; + case '\n': + stmt << "\\n"; + break; + case '\r': + stmt << "\\r"; + break; + case '\t': + stmt << "\\t"; + break; + case '\v': + stmt << "\\v"; + break; + default: + if (isPrint(s)) { + stmt << s; + } else { + // C++ io has stateful formatting settings. Messing with + // them is probably worse than doing this manually. + // NOLINTNEXTLINE(*c-arrays*) + char buf[4] = "000"; + // NOLINTNEXTLINE(*narrowing-conversions) + buf[2] += s % 8; + s /= 8; + // NOLINTNEXTLINE(*narrowing-conversions) + buf[1] += s % 8; + s /= 8; + // NOLINTNEXTLINE(*narrowing-conversions) + buf[0] += s; + stmt << "\\" << buf; + } + break; + } + } + stmt << "\""; +} + +template +std::optional tryToNumber(const char* symbol) = delete; +template +std::optional tryToNumber(const std::string& symbol) = delete; + +/* + * Convert a string to a 64 bit integer. Trailing whitespaces are not supported. + * Similarly, integer string with trailing characters like "123abc" will be + * rejected. + */ +template <> +C10_API std::optional tryToNumber(const char* symbol); +template <> +C10_API std::optional tryToNumber(const std::string& symbol); + +/* + * Convert a string to a double. Trailing whitespaces are not supported. + * Similarly, integer string with trailing characters like "123abc" will + * be rejected. + */ +template <> +C10_API std::optional tryToNumber(const char* symbol); +template <> +C10_API std::optional tryToNumber(const std::string& symbol); + +C10_API std::vector split( + std::string_view target, + char delimiter); +} // namespace c10 + +C10_CLANG_DIAGNOSTIC_POP() + +#endif // C10_UTIL_STRINGUTIL_H_ diff --git a/phivenv/Lib/site-packages/torch/include/c10/util/Synchronized.h b/phivenv/Lib/site-packages/torch/include/c10/util/Synchronized.h new file mode 100644 index 0000000000000000000000000000000000000000..74b5ac7a4025ec1db9a5b110bb39ddd943a88bf3 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/util/Synchronized.h @@ -0,0 +1,62 @@ +#pragma once + +#include + +namespace c10 { + +/** + * A very simple Synchronization class for error-free use of data + * in a multi-threaded context. See folly/docs/Synchronized.md for + * the inspiration of this class. + * + * Full URL: + * https://github.com/facebook/folly/blob/main/folly/docs/Synchronized.md + * + * This class implements a small subset of the generic functionality + * implemented by folly:Synchronized. Specifically, only withLock + * is implemented here since it's the smallest possible API that is + * able to cover a large surface area of functionality offered by + * folly::Synchronized. + */ +template +class Synchronized final { + mutable std::mutex mutex_; + T data_; + + public: + Synchronized() = default; + Synchronized(T const& data) : data_(data) {} + Synchronized(T&& data) : data_(std::move(data)) {} + + // Don't permit copy construction, move, assignment, or + // move assignment, since the underlying std::mutex + // isn't necessarily copyable/moveable. + Synchronized(Synchronized const&) = delete; + Synchronized(Synchronized&&) = delete; + Synchronized operator=(Synchronized const&) = delete; + Synchronized operator=(Synchronized&&) = delete; + ~Synchronized() = default; + + /** + * To use, call withLock with a callback that accepts T either + * by copy or by reference. Use the protected variable in the + * provided callback safely. + */ + template + auto withLock(CB&& cb) { + std::lock_guard guard(this->mutex_); + return std::forward(cb)(this->data_); + } + + /** + * To use, call withLock with a callback that accepts T either + * by copy or by const reference. Use the protected variable in + * the provided callback safely. + */ + template + auto withLock(CB&& cb) const { + std::lock_guard guard(this->mutex_); + return std::forward(cb)(this->data_); + } +}; +} // end namespace c10 diff --git a/phivenv/Lib/site-packages/torch/include/c10/util/ThreadLocal.h b/phivenv/Lib/site-packages/torch/include/c10/util/ThreadLocal.h new file mode 100644 index 0000000000000000000000000000000000000000..aa5d1d8ff7f0b01b57fad6b30da5bd2d41761040 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/util/ThreadLocal.h @@ -0,0 +1,156 @@ +#pragma once + +#include + +/** + * Android versions with libgnustl incorrectly handle thread_local C++ + * qualifier with composite types. NDK up to r17 version is affected. + * + * (A fix landed on Jun 4 2018: + * https://android-review.googlesource.com/c/toolchain/gcc/+/683601) + * + * In such cases, use c10::ThreadLocal wrapper + * which is `pthread_*` based with smart pointer semantics. + * + * In addition, convenient macro C10_DEFINE_TLS_static is available. + * To define static TLS variable of type std::string, do the following + * ``` + * C10_DEFINE_TLS_static(std::string, str_tls_); + * /////// + * { + * *str_tls_ = "abc"; + * assert(str_tls_->length(), 3); + * } + * ``` + * + * (see c10/test/util/ThreadLocal_test.cpp for more examples) + */ +#if !defined(C10_PREFER_CUSTOM_THREAD_LOCAL_STORAGE) + +#if defined(C10_ANDROID) && defined(__GLIBCXX__) && __GLIBCXX__ < 20180604 +#define C10_PREFER_CUSTOM_THREAD_LOCAL_STORAGE +#endif // defined(C10_ANDROID) && defined(__GLIBCXX__) && __GLIBCXX__ < 20180604 + +#endif // !defined(C10_PREFER_CUSTOM_THREAD_LOCAL_STORAGE) + +#if defined(C10_PREFER_CUSTOM_THREAD_LOCAL_STORAGE) +#include +#include +#include +#include +namespace c10 { + +/** + * @brief Temporary thread_local C++ qualifier replacement for Android + * based on `pthread_*`. + * To be used with composite types that provide default ctor. + */ +template +class ThreadLocal { + public: + ThreadLocal() { + pthread_key_create( + &key_, [](void* buf) { delete static_cast(buf); }); + } + + ~ThreadLocal() { + if (void* current = pthread_getspecific(key_)) { + delete static_cast(current); + } + + pthread_key_delete(key_); + } + + ThreadLocal(const ThreadLocal&) = delete; + ThreadLocal& operator=(const ThreadLocal&) = delete; + + Type& get() { + if (void* current = pthread_getspecific(key_)) { + return *static_cast(current); + } + + std::unique_ptr ptr = std::make_unique(); + if (0 == pthread_setspecific(key_, ptr.get())) { + return *ptr.release(); + } + + int err = errno; + TORCH_INTERNAL_ASSERT(false, "pthread_setspecific() failed, errno = ", err); + } + + Type& operator*() { + return get(); + } + + Type* operator->() { + return &get(); + } + + private: + pthread_key_t key_; +}; + +} // namespace c10 + +#define C10_DEFINE_TLS_static(Type, Name) static ::c10::ThreadLocal Name + +#define C10_DECLARE_TLS_class_static(Class, Type, Name) \ + static ::c10::ThreadLocal Name + +#define C10_DEFINE_TLS_class_static(Class, Type, Name) \ + ::c10::ThreadLocal Class::Name + +#else // defined(C10_PREFER_CUSTOM_THREAD_LOCAL_STORAGE) + +namespace c10 { + +/** + * @brief Default thread_local implementation for non-Android cases. + * To be used with composite types that provide default ctor. + */ +template +class ThreadLocal { + public: + using Accessor = Type* (*)(); + explicit ThreadLocal(Accessor accessor) : accessor_(accessor) {} + + ThreadLocal(const ThreadLocal&) = delete; + ThreadLocal(ThreadLocal&&) noexcept = default; + ThreadLocal& operator=(const ThreadLocal&) = delete; + ThreadLocal& operator=(ThreadLocal&&) noexcept = default; + ~ThreadLocal() = default; + + Type& get() { + return *accessor_(); + } + + Type& operator*() { + return get(); + } + + Type* operator->() { + return &get(); + } + + private: + Accessor accessor_; +}; + +} // namespace c10 + +#define C10_DEFINE_TLS_static(Type, Name) \ + static ::c10::ThreadLocal Name([]() { \ + static thread_local Type var; \ + return &var; \ + }) + +#define C10_DECLARE_TLS_class_static(Class, Type, Name) \ + static ::c10::ThreadLocal Name + +#define C10_DEFINE_TLS_class_static(Class, Type, Name) \ + ::c10::ThreadLocal Class::Name([]() { \ + static thread_local Type var; \ + return &var; \ + }) + +#endif // defined(C10_PREFER_CUSTOM_THREAD_LOCAL_STORAGE) diff --git a/phivenv/Lib/site-packages/torch/include/c10/util/ThreadLocalDebugInfo.h b/phivenv/Lib/site-packages/torch/include/c10/util/ThreadLocalDebugInfo.h new file mode 100644 index 0000000000000000000000000000000000000000..f3055c24ca6cc0942f2c16ac9dad5e2ebad17436 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/util/ThreadLocalDebugInfo.h @@ -0,0 +1,85 @@ +#pragma once + +#include + +#include +#include + +namespace c10 { + +enum class C10_API_ENUM DebugInfoKind : uint8_t { + PRODUCER_INFO = 0, + MOBILE_RUNTIME_INFO, + PROFILER_STATE, + INFERENCE_CONTEXT, // for inference usage + PARAM_COMMS_INFO, + + TEST_INFO, // used only in tests + TEST_INFO_2, // used only in tests +}; + +class C10_API DebugInfoBase { + public: + DebugInfoBase() = default; + virtual ~DebugInfoBase() = default; +}; + +// Thread local debug information is propagated across the forward +// (including async fork tasks) and backward passes and is supposed +// to be utilized by the user's code to pass extra information from +// the higher layers (e.g. model id) down to the lower levels +// (e.g. to the operator observers used for debugging, logging, +// profiling, etc) +class C10_API ThreadLocalDebugInfo { + public: + static DebugInfoBase* get(DebugInfoKind kind); + + // Get current ThreadLocalDebugInfo + static std::shared_ptr current(); + + // Internal, use DebugInfoGuard/ThreadLocalStateGuard + static void _forceCurrentDebugInfo( + std::shared_ptr info); + + // Push debug info struct of a given kind + static void _push(DebugInfoKind kind, std::shared_ptr info); + // Pop debug info, throws in case the last pushed + // debug info is not of a given kind + static std::shared_ptr _pop(DebugInfoKind kind); + // Peek debug info, throws in case the last pushed debug info is not of the + // given kind + static std::shared_ptr _peek(DebugInfoKind kind); + + private: + std::shared_ptr info_; + DebugInfoKind kind_; + std::shared_ptr parent_info_; + + friend class DebugInfoGuard; +}; + +// DebugInfoGuard is used to set debug information, +// ThreadLocalDebugInfo is semantically immutable, the values are set +// through the scope-based guard object. +// Nested DebugInfoGuard adds/overrides existing values in the scope, +// restoring the original values after exiting the scope. +// Users can access the values through the ThreadLocalDebugInfo::get() call; +class C10_API DebugInfoGuard { + public: + DebugInfoGuard(DebugInfoKind kind, std::shared_ptr info); + + explicit DebugInfoGuard(std::shared_ptr info); + + ~DebugInfoGuard(); + + DebugInfoGuard(const DebugInfoGuard&) = delete; + DebugInfoGuard(DebugInfoGuard&&) = delete; + DebugInfoGuard& operator=(const DebugInfoGuard&) = delete; + DebugInfoGuard& operator=(DebugInfoGuard&&) = delete; + + private: + bool active_ = false; + std::shared_ptr prev_info_ = nullptr; +}; + +} // namespace c10 diff --git a/phivenv/Lib/site-packages/torch/include/c10/util/Type.h b/phivenv/Lib/site-packages/torch/include/c10/util/Type.h new file mode 100644 index 0000000000000000000000000000000000000000..f2d73853bc3a6a5a573fc7710b362d75c6496d10 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/util/Type.h @@ -0,0 +1,30 @@ +#ifndef C10_UTIL_TYPE_H_ +#define C10_UTIL_TYPE_H_ + +#include +#include +#ifdef __GXX_RTTI +#include +#endif // __GXX_RTTI + +#include + +namespace c10 { + +/// Utility to demangle a C++ symbol name. +C10_API std::string demangle(const char* name); + +/// Returns the printable name of the type. +template +inline const char* demangle_type() { +#ifdef __GXX_RTTI + static const auto& name = *(new std::string(demangle(typeid(T).name()))); + return name.c_str(); +#else // __GXX_RTTI + return "(RTTI disabled, cannot show name)"; +#endif // __GXX_RTTI +} + +} // namespace c10 + +#endif // C10_UTIL_TYPE_H_ diff --git a/phivenv/Lib/site-packages/torch/include/c10/util/TypeCast.h b/phivenv/Lib/site-packages/torch/include/c10/util/TypeCast.h new file mode 100644 index 0000000000000000000000000000000000000000..74633b81522bf7d22cbcc86c6121ad613a346c10 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/util/TypeCast.h @@ -0,0 +1,210 @@ +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +C10_CLANG_DIAGNOSTIC_PUSH() +#if C10_CLANG_HAS_WARNING("-Wimplicit-float-conversion") +C10_CLANG_DIAGNOSTIC_IGNORE("-Wimplicit-float-conversion") +#endif +#if C10_CLANG_HAS_WARNING("-Wimplicit-int-float-conversion") +C10_CLANG_DIAGNOSTIC_IGNORE("-Wimplicit-int-float-conversion") +#endif + +namespace c10 { + +template +struct needs_real { + constexpr static bool value = + (is_complex::value && !is_complex::value); +}; + +template +struct maybe_real { + C10_HOST_DEVICE static inline src_t apply(src_t src) { + return src; + } +}; + +template +struct maybe_real { + C10_HOST_DEVICE static inline decltype(auto) apply(src_t src) { + return src.real(); + } +}; + +template +struct maybe_bool { + C10_HOST_DEVICE static inline src_t apply(src_t src) { + return src; + } +}; + +template +struct maybe_bool { + C10_HOST_DEVICE static inline decltype(auto) apply(src_t src) { + // Don't use bool operator so as to to also compile for ComplexHalf. + return src.real() || src.imag(); + } +}; + +// Note: deliberately ignores undefined behavior, consistent with NumPy. +// PyTorch's type conversions can cause a variety of undefined behavior, +// including float to integral overflow and signed to unsigned integer overflow. +// Some of this undefined behavior is addressed below. +template +struct static_cast_with_inter_type { + C10_HOST_DEVICE __ubsan_ignore_undefined__ static inline dest_t apply( + src_t src) { + constexpr bool real = needs_real::value; + auto r = maybe_real::apply(src); + return static_cast(r); + } +}; + +// Partial template specialization for casting to bool. +// Need to handle complex types separately, as we don't +// simply want to cast the real part to bool. +template +struct static_cast_with_inter_type { + C10_HOST_DEVICE static inline bool apply(src_t src) { + constexpr bool complex = needs_real::value; + return static_cast(maybe_bool::apply(src)); + } +}; + +// Partial template instantiation for casting to uint8. +// Note: Converting from negative float values to unsigned integer types is +// undefined behavior in C++, and current CPU and GPU compilers exhibit +// divergent behavior. Casting from negative float values to signed +// integer types and then to unsigned integer types is not undefined, +// however, so this cast improves the consistency of type conversions +// to uint8 across compilers. +// Further note: Type conversions across compilers still have other undefined +// and divergent behavior. +template +struct static_cast_with_inter_type { + C10_HOST_DEVICE __ubsan_ignore_undefined__ static inline uint8_t apply( + src_t src) { + constexpr bool real = needs_real::value; + return static_cast( + static_cast(maybe_real::apply(src))); + } +}; + +template <> +struct static_cast_with_inter_type, c10::BFloat16> { + C10_HOST_DEVICE __ubsan_ignore_undefined__ static inline c10::complex< + c10::Half> + apply(c10::BFloat16 src) { + return static_cast>(c10::complex{src}); + } +}; + +template <> +struct static_cast_with_inter_type, c10::Float8_e5m2> { + C10_HOST_DEVICE __ubsan_ignore_undefined__ static inline c10::complex< + c10::Half> + apply(c10::Float8_e5m2 src) { + return static_cast>(c10::complex{src}); + } +}; + +template <> +struct static_cast_with_inter_type< + c10::complex, + c10::Float8_e5m2fnuz> { + C10_HOST_DEVICE __ubsan_ignore_undefined__ static inline c10::complex< + c10::Half> + apply(c10::Float8_e5m2fnuz src) { + return static_cast>(c10::complex{src}); + } +}; + +template <> +struct static_cast_with_inter_type< + c10::complex, + c10::Float8_e4m3fn> { + C10_HOST_DEVICE __ubsan_ignore_undefined__ static inline c10::complex< + c10::Half> + apply(c10::Float8_e4m3fn src) { + return static_cast>(c10::complex{src}); + } +}; + +template <> +struct static_cast_with_inter_type< + c10::complex, + c10::Float8_e4m3fnuz> { + C10_HOST_DEVICE __ubsan_ignore_undefined__ static inline c10::complex< + c10::Half> + apply(c10::Float8_e4m3fnuz src) { + return static_cast>(c10::complex{src}); + } +}; + +// TODO(#146647): Can we make all these template specialization happen +// based off our apply macros? +template <> +struct static_cast_with_inter_type< + c10::complex, + c10::Float8_e8m0fnu> { + C10_HOST_DEVICE __ubsan_ignore_undefined__ static inline c10::complex< + c10::Half> + apply(c10::Float8_e8m0fnu src) { + return static_cast>(c10::complex{src}); + } +}; + +template <> +struct static_cast_with_inter_type, c10::Half> { + C10_HOST_DEVICE __ubsan_ignore_undefined__ static inline c10::complex< + c10::Half> + apply(c10::Half src) { + return static_cast>(c10::complex{src}); + } +}; + +template <> +struct static_cast_with_inter_type< + c10::complex, + c10::complex> { + C10_HOST_DEVICE __ubsan_ignore_undefined__ static inline c10::complex< + c10::Half> + apply(c10::complex src) { + return static_cast>( + static_cast>(src)); + } +}; + +template +C10_HOST_DEVICE To convert(From f) { + return static_cast_with_inter_type::apply(f); +} + +// Define separately to avoid being inlined and prevent code-size bloat +[[noreturn]] C10_API void report_overflow(const char* name); + +template +To checked_convert(From f, const char* name) { + // Converting to bool can't overflow so we exclude this case from checking. + if (!std::is_same_v && overflows(f)) { + report_overflow(name); + } + return convert(f); +} + +} // namespace c10 + +C10_CLANG_DIAGNOSTIC_POP() + +// Trigger tests for D25440771. TODO: Remove this line any time you want. diff --git a/phivenv/Lib/site-packages/torch/include/c10/util/TypeIndex.h b/phivenv/Lib/site-packages/torch/include/c10/util/TypeIndex.h new file mode 100644 index 0000000000000000000000000000000000000000..3f988594aaeae40c3259de5a9b31042421d40fc8 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/util/TypeIndex.h @@ -0,0 +1,127 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#if !defined(FBCODE_CAFFE2) && !defined(C10_NODEPRECATED) +#define C10_TYPENAME_SUPPORTS_CONSTEXPR 1 +#define C10_TYPENAME_CONSTEXPR constexpr +#endif + +namespace c10::util { + +struct type_index final : IdWrapper { + constexpr explicit type_index(uint64_t checksum) : IdWrapper(checksum) {} + + // Allow usage in std::map / std::set + // TODO Disallow this and rather use std::unordered_map/set everywhere + friend constexpr bool operator<(type_index lhs, type_index rhs) noexcept { + return lhs.underlyingId() < rhs.underlyingId(); + } + + friend std::ostream& operator<<(std::ostream& stream, type_index typeId) { + return stream << typeId.underlyingId(); + } +}; + +namespace detail { + +template +inline constexpr c10::c10_string_view fully_qualified_type_name_impl() { +#if defined(_MSC_VER) && !defined(__clang__) + constexpr std::string_view fun_sig = __FUNCSIG__; +#if defined(__NVCC__) + constexpr std::string_view prefix = + "c10::basic_string_view c10::util::detail::fully_qualified_type_name_impl<"; + constexpr std::string_view suffix = ">()"; +#else + constexpr std::string_view prefix = + "class c10::basic_string_view __cdecl c10::util::detail::fully_qualified_type_name_impl<"; + constexpr std::string_view suffix = ">(void)"; +#endif +#elif defined(__clang__) + constexpr std::string_view fun_sig = __PRETTY_FUNCTION__; + constexpr std::string_view prefix = + "c10::c10_string_view c10::util::detail::fully_qualified_type_name_impl() [T = "; + constexpr std::string_view suffix = "]"; +#elif defined(__GNUC__) + constexpr std::string_view fun_sig = __PRETTY_FUNCTION__; + constexpr std::string_view prefix = + "constexpr c10::c10_string_view c10::util::detail::fully_qualified_type_name_impl() [with T = "; + constexpr std::string_view suffix = + "; c10::c10_string_view = c10::basic_string_view]"; +#endif +#if !defined(__CUDA_ARCH__) && !defined(__CUDA_ARCH_LIST__) + static_assert(c10::starts_with( + static_cast(fun_sig), + static_cast(prefix))); + static_assert(c10::ends_with( + static_cast(fun_sig), + static_cast(suffix))); +#endif + return fun_sig.substr( + prefix.size(), fun_sig.size() - prefix.size() - suffix.size()); +} + +#if !defined(__CUDA_ARCH__) && !defined(__CUDA_ARCH_LIST__) +template +inline constexpr uint64_t type_index_impl() { +// Idea: __PRETTY_FUNCTION__ (or __FUNCSIG__ on msvc) contains a qualified name +// of this function, including its template parameter, i.e. including the +// type we want an id for. We use this name and run crc64 on it to get a type +// id. +#if defined(_MSC_VER) && !defined(__clang__) + return crc64(__FUNCSIG__, sizeof(__FUNCSIG__)).checksum(); +#elif defined(__clang__) + return crc64(__PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__)).checksum(); +#elif defined(__GNUC__) + return crc64(__PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__)).checksum(); +#endif +} +#endif + +} // namespace detail + +template +inline constexpr type_index get_type_index() { +#if !defined(__CUDA_ARCH__) && !defined(__CUDA_ARCH_LIST__) + // To enforce that this is really computed at compile time, we pass the + // type index through std::integral_constant. + return type_index{std::integral_constant< + uint64_t, + detail::type_index_impl>()>::value}; +#else + // There's nothing in theory preventing us from running this on device code + // except for nvcc throwing a compiler error if we enable it. + return (abort(), type_index(0)); +#endif +} + +#if !defined(TORCH_PEDANTIC) +// Use precomputed hashsum for std::string +// Needed to workaround ambiguity in class name resolution +// into __PRETTY_FUNCTION__ when abovementioned class is defined in inlined +// namespace. In multi-ABI C++ library, `std::string` is an alias to +// `std::__cxx11::basic_string` which depending on compiler flags can be +// resolved to `basic_string` either in `std` namespace or in +// `std::__cxx11` one (`__cxx11` is an inline namespace) +template <> +inline constexpr type_index get_type_index() { + // hashsum for std::basic_string + return type_index{4193213214807308375ULL}; +} +#endif + +template +inline constexpr std::string_view get_fully_qualified_type_name() noexcept { + return static_cast( + detail::fully_qualified_type_name_impl()); +} +} // namespace c10::util + +C10_DEFINE_HASH_FOR_IDWRAPPER(c10::util::type_index) diff --git a/phivenv/Lib/site-packages/torch/include/c10/util/TypeList.h b/phivenv/Lib/site-packages/torch/include/c10/util/TypeList.h new file mode 100644 index 0000000000000000000000000000000000000000..438477a73518f24a5a978fe164a1a8dcdcffd721 --- /dev/null +++ b/phivenv/Lib/site-packages/torch/include/c10/util/TypeList.h @@ -0,0 +1,515 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace c10::guts { + +template +struct false_t : std::false_type {}; +template