先坤

add greedrl

db26c81 almost 3 years ago

5.45 kB

	#pragma once
	#include <cfloat>
	#include <climits>
	#include <cstdint>
	#include <limits>
	#include <chrono>
	#include <stdexcept>
	#include <torch/extension.h>

	#define ASSERT(c) assert(c)
	#define ALIGN(v, n) ((v + n - 1) / n * n)
	#define INF std::numeric_limits<float>::infinity()
	#define __FILENAME__ (__FILE__+ SOURCE_PATH_LENGTH)

	#define GRL_ERROR(format, args...) \
	greedrl_error(__FILENAME__, __LINE__, format, ##args); \


	#define GRL_CHECK(flag, format, args...) \
	greedrl_check(__FILENAME__, __LINE__, flag, format, ##args); \


	#define MALLOC(ptr, T, size) \
	ptr = (T) malloc(sizeof(T) (size)); \
	GRL_CHECK(ptr != nullptr, "out of memory!"); \


	#define GALLOC(ptr, T, size) \
	GRL_CHECK((size) > 0, "malloc 0 bytes"); \
	T* const ptr = (T) malloc(sizeof(T) (size)); \
	GRL_CHECK(ptr != nullptr, "out of memory!"); \
	AllocGuard ptr##_##alloc##_##guard(ptr); \


	#define REALLOC(ptr, T, size) \
	GRL_CHECK((size) > 0, "malloc 0 bytes"); \
	ptr = (T) realloc(ptr, sizeof(T) (size)); \
	GRL_CHECK(ptr != nullptr, "out of memory!"); \


	#define GRL_CHECK_TENSOR(tensor, device, allow_sub_contiguous, allow_null, ...) \
	greedrl_check_tensor(__FILENAME__, __LINE__, tensor, #tensor, device, \
	allow_sub_contiguous, allow_null, {__VA_ARGS__}); \


	const int GRL_WORKER_START = 0;
	const int GRL_WORKER_END = 1;
	const int GRL_TASK = 2;
	const int GRL_FINISH = 3;

	const int MAX_BATCH_SIZE = 100000;
	const int MAX_TASK_COUNT = 5120;
	const int MAX_SHARED_MEM = 48128;

	using String = std::string;
	using Device = torch::Device;
	using Tensor = torch::Tensor;
	using TensorMap = std::map<String, Tensor>;
	using TensorList = std::vector<Tensor>;


	inline void greedrl_error(const char* const file, const int64_t line,
	const char* const format, ...)
	{
	const int N = 2048;
	static char buf[N];

	va_list args;
	va_start(args, format);
	int n = vsnprintf(buf, N, format, args);
	va_end(args);

	if(n < N)
	{
	snprintf(buf+n, N-n, " at %s:%ld", file, line);
	}

	throw std::runtime_error(buf);
	}

	inline void greedrl_check(const char* const file, const int64_t line,
	const bool flag, const char* const format, ...)
	{
	if(flag)
	{
	return;
	}

	const int N = 2048;
	static char buf[N];

	va_list args;
	va_start(args, format);
	int n = vsnprintf(buf, N, format, args);
	va_end(args);

	if(n < N)
	{
	snprintf(buf+n, N-n, " at %s:%ld", file, line);
	}

	throw std::runtime_error(buf);
	}

	// contiguous except the 1st dimension
	inline bool is_sub_contiguous(const Tensor& tensor)
	{
	int dim = tensor.dim();
	if(dim==1) return true;

	auto sizes = tensor.sizes();
	auto strides = tensor.strides();

	if(strides[dim-1] != 1) return false;

	int s = 1;
	for(int i=dim-2; i>0; i--)
	{
	s *= sizes[i+1];
	if(strides[i] != s) return false;
	}

	return true;

	};

	inline void greedrl_check_tensor(const char* const file,
	const int line,
	const Tensor& tensor,
	const String& name,
	const Device& device,
	bool allow_sub_contiguous,
	bool allow_null,
	std::initializer_list<int> sizes)
	{
	greedrl_check(file, line, tensor.numel() < 1000 * 1000 * 1000, "tensor size too large");

	auto device2 = tensor.device();
	greedrl_check(file, line, device2==device,
	"'%s' device is %s, but expect %s",
	name.c_str(), device2.str().c_str(), device.str().c_str());

	bool is_contiguous = allow_sub_contiguous ? is_sub_contiguous(tensor) : tensor.is_contiguous();
	greedrl_check(file, line, is_contiguous, "'%s' is not contiguous", name.c_str());

	if(allow_null && tensor.data_ptr() == nullptr) return;

	if(tensor.dim() != sizes.size())
	{
	greedrl_error(file, line, "'%s' dim is %d, but expect %d", name.c_str(), (int)tensor.dim(), (int)sizes.size());
	}
	int i=0;
	for(auto s:sizes)
	{
	greedrl_check(file, line, tensor.size(i)==s, "'%s' size(%d) is %d, but expect %d", name.c_str(), i, (int)tensor.size(i), s);
	i++;
	}
	}


	#ifdef CUDA_FOUND

	#include <cuda_runtime_api.h>

	#define GRL_CHECK_CUDA(error)\
	greedrl_check_cuda(error, __FILENAME__, __LINE__);

	inline void greedrl_check_cuda(const cudaError_t& error,
	const char* file, const int64_t line)
	{
	if(error==cudaSuccess)
	{
	return;
	}

	const int N = 2048;
	static char buf[N];
	snprintf(buf, N, "%s, at %s:%ld", cudaGetErrorString(error), file, line);
	throw std::runtime_error(buf);
	}

	cudaDeviceProp& cuda_get_device_prop(int i);

	#endif