/** Copyright (c) 2022 NVIDIA CORPORATION. All rights reserved. * NVIDIA CORPORATION and its licensors retain all intellectual property * and proprietary rights in and to this software, related documentation * and any modifications thereto. Any use, reproduction, disclosure or * distribution of this software and related documentation without an express * license agreement from NVIDIA CORPORATION is strictly prohibited. */ #pragma once #include "builtin.h" #if WP_ENABLE_CUDA #include #include #include #define check_cuda(code) (check_cuda_result(code, __FILE__, __LINE__)) #define check_cu(code) (check_cu_result(code, __FILE__, __LINE__)) #if defined(__CUDACC__) #if _DEBUG // helper for launching kernels (synchronize + error checking after each kernel) #define wp_launch_device(context, kernel, dim, args) { \ if (dim) { \ ContextGuard guard(context); \ const int num_threads = 256; \ const int num_blocks = (dim+num_threads-1)/num_threads; \ kernel<<>>args; \ check_cuda(cuda_context_check(WP_CURRENT_CONTEXT)); } } #else // helper for launching kernels (no error checking) #define wp_launch_device(context, kernel, dim, args) { \ if (dim) { \ ContextGuard guard(context); \ const int num_threads = 256; \ const int num_blocks = (dim+num_threads-1)/num_threads; \ kernel<<>>args; } } #endif // _DEBUG #endif // defined(__CUDACC__) CUresult cuDriverGetVersion_f(int* version); CUresult cuGetErrorName_f(CUresult result, const char** pstr); CUresult cuGetErrorString_f(CUresult result, const char** pstr); CUresult cuInit_f(unsigned int flags); CUresult cuDeviceGet_f(CUdevice *dev, int ordinal); CUresult cuDeviceGetCount_f(int* count); CUresult cuDeviceGetName_f(char* name, int len, CUdevice dev); CUresult cuDeviceGetAttribute_f(int* value, CUdevice_attribute attrib, CUdevice dev); CUresult cuDevicePrimaryCtxRetain_f(CUcontext* ctx, CUdevice dev); CUresult cuDevicePrimaryCtxRelease_f(CUdevice dev); CUresult cuDeviceCanAccessPeer_f(int* can_access, CUdevice dev, CUdevice peer_dev); CUresult cuCtxGetCurrent_f(CUcontext* ctx); CUresult cuCtxSetCurrent_f(CUcontext ctx); CUresult cuCtxPushCurrent_f(CUcontext ctx); CUresult cuCtxPopCurrent_f(CUcontext* ctx); CUresult cuCtxSynchronize_f(); CUresult cuCtxGetDevice_f(CUdevice* dev); CUresult cuCtxCreate_f(CUcontext* ctx, unsigned int flags, CUdevice dev); CUresult cuCtxDestroy_f(CUcontext ctx); CUresult cuCtxEnablePeerAccess_f(CUcontext peer_ctx, unsigned int flags); CUresult cuStreamCreate_f(CUstream* stream, unsigned int flags); CUresult cuStreamDestroy_f(CUstream stream); CUresult cuStreamSynchronize_f(CUstream stream); CUresult cuStreamWaitEvent_f(CUstream stream, CUevent event, unsigned int flags); CUresult cuEventCreate_f(CUevent* event, unsigned int flags); CUresult cuEventDestroy_f(CUevent event); CUresult cuEventRecord_f(CUevent event, CUstream stream); CUresult cuModuleUnload_f(CUmodule hmod); CUresult cuModuleLoadDataEx_f(CUmodule *module, const void *image, unsigned int numOptions, CUjit_option *options, void **optionValues); CUresult cuModuleGetFunction_f(CUfunction *hfunc, CUmodule hmod, const char *name); CUresult cuLaunchKernel_f(CUfunction f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, CUstream hStream, void **kernelParams, void **extra); CUresult cuMemcpyPeerAsync_f(CUdeviceptr dst_ptr, CUcontext dst_ctx, CUdeviceptr src_ptr, CUcontext src_ctx, size_t n, CUstream stream); CUresult cuGraphicsMapResources_f(unsigned int count, CUgraphicsResource* resources, CUstream stream); CUresult cuGraphicsUnmapResources_f(unsigned int count, CUgraphicsResource* resources, CUstream hStream); CUresult cuGraphicsResourceGetMappedPointer_f(CUdeviceptr* pDevPtr, size_t* pSize, CUgraphicsResource resource); CUresult cuGraphicsGLRegisterBuffer_f(CUgraphicsResource *pCudaResource, unsigned int buffer, unsigned int flags); CUresult cuGraphicsUnregisterResource_f(CUgraphicsResource resource); bool init_cuda_driver(); bool is_cuda_driver_initialized(); bool check_cuda_result(cudaError_t code, const char* file, int line); inline bool check_cuda_result(uint64_t code, const char* file, int line) { return check_cuda_result(static_cast(code), file, line); } bool check_cu_result(CUresult result, const char* file, int line); // // Scoped CUDA context guard // // Behaviour on entry // - If the given `context` is NULL, do nothing. // - If the given `context` is the same as the current context, do nothing. // - If the given `context` is different from the current context, make the given context current. // // Behaviour on exit // - If the current context did not change on entry, do nothing. // - If the `restore` flag was true on entry, make the previous context current. // // Default exit behaviour policy // - If the `restore` flag is omitted on entry, fall back on the global `always_restore` flag. // - This allows us to easily change the default behaviour of the guards. // class ContextGuard { public: // default policy for restoring contexts static bool always_restore; explicit ContextGuard(CUcontext context, bool restore=always_restore) : need_restore(false) { if (context) { if (check_cu(cuCtxGetCurrent_f(&prev_context)) && context != prev_context) need_restore = check_cu(cuCtxSetCurrent_f(context)) && restore; } } explicit ContextGuard(void* context, bool restore=always_restore) : ContextGuard(static_cast(context), restore) { } ~ContextGuard() { if (need_restore) check_cu(cuCtxSetCurrent_f(prev_context)); } private: CUcontext prev_context; bool need_restore; }; #else typedef int CUdevice; typedef struct CUctx_st* CUcontext; typedef struct CUstream_st* CUstream; class ContextGuard { public: explicit ContextGuard(CUcontext context, bool restore=false) { (void)context; (void)restore; } explicit ContextGuard(void* context, bool restore=false) { (void)context; (void)restore; } }; #endif // WP_ENABLE_CUDA // Pass this value to device functions as the `context` parameter to bypass unnecessary context management. // This works in conjuntion with ContextGuards, which do nothing if the given context is NULL. // Using this variable instead of passing NULL directly aids readability and makes the intent clear. constexpr void* WP_CURRENT_CONTEXT = NULL;