|
|
#pragma once
|
|
|
|
|
|
#include <c10/cuda/CUDADeviceAssertionHost.h>
|
|
|
#include <c10/cuda/CUDAMacros.h>
|
|
|
#include <c10/cuda/CUDAMiscFunctions.h>
|
|
|
#include <c10/macros/Macros.h>
|
|
|
#include <c10/util/Exception.h>
|
|
|
#include <c10/util/irange.h>
|
|
|
#include <cuda.h>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
namespace c10 {
|
|
|
class C10_CUDA_API CUDAError : public c10::Error {
|
|
|
using Error::Error;
|
|
|
};
|
|
|
}
|
|
|
|
|
|
#define C10_CUDA_CHECK(EXPR) \
|
|
|
do { \
|
|
|
const cudaError_t __err = EXPR; \
|
|
|
c10::cuda::c10_cuda_check_implementation( \
|
|
|
static_cast<int32_t>(__err), \
|
|
|
__FILE__, \
|
|
|
__func__, |
|
|
\
|
|
|
static_cast<uint32_t>(__LINE__), \
|
|
|
true); \
|
|
|
} while (0)
|
|
|
|
|
|
#define C10_CUDA_CHECK_WARN(EXPR) \
|
|
|
do { \
|
|
|
const cudaError_t __err = EXPR; \
|
|
|
if (C10_UNLIKELY(__err != cudaSuccess)) { \
|
|
|
[[maybe_unused]] auto error_unused = cudaGetLastError(); \
|
|
|
TORCH_WARN("CUDA warning: ", cudaGetErrorString(__err)); \
|
|
|
} \
|
|
|
} while (0)
|
|
|
|
|
|
|
|
|
#define C10_CUDA_ERROR_HANDLED(EXPR) EXPR
|
|
|
|
|
|
|
|
|
#define C10_CUDA_IGNORE_ERROR(EXPR) \
|
|
|
do { \
|
|
|
const cudaError_t __err = EXPR; \
|
|
|
if (C10_UNLIKELY(__err != cudaSuccess)) { \
|
|
|
[[maybe_unused]] cudaError_t error_unused = cudaGetLastError(); \
|
|
|
} \
|
|
|
} while (0)
|
|
|
|
|
|
|
|
|
#define C10_CUDA_CLEAR_ERROR() \
|
|
|
do { \
|
|
|
[[maybe_unused]] cudaError_t error_unused = cudaGetLastError(); \
|
|
|
} while (0)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#define C10_CUDA_KERNEL_LAUNCH_CHECK() C10_CUDA_CHECK(cudaGetLastError())
|
|
|
|
|
|
|
|
|
|
|
|
#define TORCH_DSA_KERNEL_LAUNCH( \
|
|
|
kernel, blocks, threads, shared_mem, stream, ...) \
|
|
|
do { \
|
|
|
auto& launch_registry = \
|
|
|
c10::cuda::CUDAKernelLaunchRegistry::get_singleton_ref(); \
|
|
|
kernel<<<blocks, threads, shared_mem, stream>>>( \
|
|
|
__VA_ARGS__, \
|
|
|
launch_registry.get_uvm_assertions_ptr_for_current_device(), \
|
|
|
launch_registry.insert( \
|
|
|
__FILE__, __FUNCTION__, __LINE__, #kernel, stream.id())); \
|
|
|
C10_CUDA_KERNEL_LAUNCH_CHECK(); \
|
|
|
} while (0)
|
|
|
|
|
|
namespace c10::cuda {
|
|
|
|
|
|
|
|
|
|
|
|
C10_CUDA_API void c10_cuda_check_implementation(
|
|
|
const int32_t err,
|
|
|
const char* filename,
|
|
|
const char* function_name,
|
|
|
const int line_number,
|
|
|
const bool include_device_assertions);
|
|
|
|
|
|
}
|
|
|
|