Spaces:

qbhf2
/

GarmentCode

Sleeping

App Files Files Community

GarmentCode / NvidiaWarp-GarmentCode /warp /native /cuda_util.h

qbhf2

added NvidiaWarp and GarmentCode repos

66c9c8a 11 months ago

raw

history blame contribute delete

6.83 kB

	/** Copyright (c) 2022 NVIDIA CORPORATION. All rights reserved.
	* NVIDIA CORPORATION and its licensors retain all intellectual property
	* and proprietary rights in and to this software, related documentation
	* and any modifications thereto. Any use, reproduction, disclosure or
	* distribution of this software and related documentation without an express
	* license agreement from NVIDIA CORPORATION is strictly prohibited.
	*/

	#pragma once

	#include "builtin.h"

	#if WP_ENABLE_CUDA

	#include <cudaTypedefs.h>
	#include <cuda_runtime_api.h>

	#include <stdio.h>

	#define check_cuda(code) (check_cuda_result(code, __FILE__, __LINE__))
	#define check_cu(code) (check_cu_result(code, __FILE__, __LINE__))


	#if defined(__CUDACC__)
	#if _DEBUG
	// helper for launching kernels (synchronize + error checking after each kernel)
	#define wp_launch_device(context, kernel, dim, args) { \
	if (dim) { \
	ContextGuard guard(context); \
	const int num_threads = 256; \
	const int num_blocks = (dim+num_threads-1)/num_threads; \
	kernel<<<num_blocks, 256, 0, (cudaStream_t)cuda_stream_get_current()>>>args; \
	check_cuda(cuda_context_check(WP_CURRENT_CONTEXT)); } }
	#else
	// helper for launching kernels (no error checking)
	#define wp_launch_device(context, kernel, dim, args) { \
	if (dim) { \
	ContextGuard guard(context); \
	const int num_threads = 256; \
	const int num_blocks = (dim+num_threads-1)/num_threads; \
	kernel<<<num_blocks, 256, 0, (cudaStream_t)cuda_stream_get_current()>>>args; } }
	#endif // _DEBUG
	#endif // defined(__CUDACC__)


	CUresult cuDriverGetVersion_f(int* version);
	CUresult cuGetErrorName_f(CUresult result, const char** pstr);
	CUresult cuGetErrorString_f(CUresult result, const char** pstr);
	CUresult cuInit_f(unsigned int flags);
	CUresult cuDeviceGet_f(CUdevice *dev, int ordinal);
	CUresult cuDeviceGetCount_f(int* count);
	CUresult cuDeviceGetName_f(char* name, int len, CUdevice dev);
	CUresult cuDeviceGetAttribute_f(int* value, CUdevice_attribute attrib, CUdevice dev);
	CUresult cuDevicePrimaryCtxRetain_f(CUcontext* ctx, CUdevice dev);
	CUresult cuDevicePrimaryCtxRelease_f(CUdevice dev);
	CUresult cuDeviceCanAccessPeer_f(int* can_access, CUdevice dev, CUdevice peer_dev);
	CUresult cuCtxGetCurrent_f(CUcontext* ctx);
	CUresult cuCtxSetCurrent_f(CUcontext ctx);
	CUresult cuCtxPushCurrent_f(CUcontext ctx);
	CUresult cuCtxPopCurrent_f(CUcontext* ctx);
	CUresult cuCtxSynchronize_f();
	CUresult cuCtxGetDevice_f(CUdevice* dev);
	CUresult cuCtxCreate_f(CUcontext* ctx, unsigned int flags, CUdevice dev);
	CUresult cuCtxDestroy_f(CUcontext ctx);
	CUresult cuCtxEnablePeerAccess_f(CUcontext peer_ctx, unsigned int flags);
	CUresult cuStreamCreate_f(CUstream* stream, unsigned int flags);
	CUresult cuStreamDestroy_f(CUstream stream);
	CUresult cuStreamSynchronize_f(CUstream stream);
	CUresult cuStreamWaitEvent_f(CUstream stream, CUevent event, unsigned int flags);
	CUresult cuEventCreate_f(CUevent* event, unsigned int flags);
	CUresult cuEventDestroy_f(CUevent event);
	CUresult cuEventRecord_f(CUevent event, CUstream stream);
	CUresult cuModuleUnload_f(CUmodule hmod);
	CUresult cuModuleLoadDataEx_f(CUmodule module, const void image, unsigned int numOptions, CUjit_option options, void *optionValues);
	CUresult cuModuleGetFunction_f(CUfunction hfunc, CUmodule hmod, const char name);
	CUresult cuLaunchKernel_f(CUfunction f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, CUstream hStream, void kernelParams, void extra);
	CUresult cuMemcpyPeerAsync_f(CUdeviceptr dst_ptr, CUcontext dst_ctx, CUdeviceptr src_ptr, CUcontext src_ctx, size_t n, CUstream stream);
	CUresult cuGraphicsMapResources_f(unsigned int count, CUgraphicsResource* resources, CUstream stream);
	CUresult cuGraphicsUnmapResources_f(unsigned int count, CUgraphicsResource* resources, CUstream hStream);
	CUresult cuGraphicsResourceGetMappedPointer_f(CUdeviceptr* pDevPtr, size_t* pSize, CUgraphicsResource resource);
	CUresult cuGraphicsGLRegisterBuffer_f(CUgraphicsResource *pCudaResource, unsigned int buffer, unsigned int flags);
	CUresult cuGraphicsUnregisterResource_f(CUgraphicsResource resource);


	bool init_cuda_driver();
	bool is_cuda_driver_initialized();

	bool check_cuda_result(cudaError_t code, const char* file, int line);
	inline bool check_cuda_result(uint64_t code, const char* file, int line)
	{
	return check_cuda_result(static_cast<cudaError_t>(code), file, line);
	}

	bool check_cu_result(CUresult result, const char* file, int line);


	//
	// Scoped CUDA context guard
	//
	// Behaviour on entry
	// - If the given `context` is NULL, do nothing.
	// - If the given `context` is the same as the current context, do nothing.
	// - If the given `context` is different from the current context, make the given context current.
	//
	// Behaviour on exit
	// - If the current context did not change on entry, do nothing.
	// - If the `restore` flag was true on entry, make the previous context current.
	//
	// Default exit behaviour policy
	// - If the `restore` flag is omitted on entry, fall back on the global `always_restore` flag.
	// - This allows us to easily change the default behaviour of the guards.
	//
	class ContextGuard
	{
	public:
	// default policy for restoring contexts
	static bool always_restore;

	explicit ContextGuard(CUcontext context, bool restore=always_restore)
	: need_restore(false)
	{
	if (context)
	{
	if (check_cu(cuCtxGetCurrent_f(&prev_context)) && context != prev_context)
	need_restore = check_cu(cuCtxSetCurrent_f(context)) && restore;
	}
	}

	explicit ContextGuard(void* context, bool restore=always_restore)
	: ContextGuard(static_cast<CUcontext>(context), restore)
	{
	}

	~ContextGuard()
	{
	if (need_restore)
	check_cu(cuCtxSetCurrent_f(prev_context));
	}

	private:
	CUcontext prev_context;
	bool need_restore;
	};

	#else

	typedef int CUdevice;
	typedef struct CUctx_st* CUcontext;
	typedef struct CUstream_st* CUstream;

	class ContextGuard
	{
	public:
	explicit ContextGuard(CUcontext context, bool restore=false)
	{
	(void)context;
	(void)restore;
	}

	explicit ContextGuard(void* context, bool restore=false)
	{
	(void)context;
	(void)restore;
	}
	};

	#endif // WP_ENABLE_CUDA

	// Pass this value to device functions as the `context` parameter to bypass unnecessary context management.
	// This works in conjuntion with ContextGuards, which do nothing if the given context is NULL.
	// Using this variable instead of passing NULL directly aids readability and makes the intent clear.
	constexpr void* WP_CURRENT_CONTEXT = NULL;