| #pragma once |
| #include <cfloat> |
| #include <climits> |
| #include <cstdint> |
| #include <limits> |
| #include <chrono> |
| #include <stdexcept> |
| #include <torch/extension.h> |
|
|
| #define ASSERT(c) assert(c) |
| #define ALIGN(v, n) ((v + n - 1) / n * n) |
| #define INF std::numeric_limits<float>::infinity() |
| #define __FILENAME__ (__FILE__+ SOURCE_PATH_LENGTH) |
|
|
| #define GRL_ERROR(format, args...) \ |
| greedrl_error(__FILENAME__, __LINE__, format, ##args); \ |
| |
|
|
| #define GRL_CHECK(flag, format, args...) \ |
| greedrl_check(__FILENAME__, __LINE__, flag, format, ##args); \ |
| |
|
|
| #define MALLOC(ptr, T, size) \ |
| ptr = (T*) malloc(sizeof(T) * (size)); \ |
| GRL_CHECK(ptr != nullptr, "out of memory!"); \ |
| |
|
|
| #define GALLOC(ptr, T, size) \ |
| GRL_CHECK((size) > 0, "malloc 0 bytes"); \ |
| T* const ptr = (T*) malloc(sizeof(T) * (size)); \ |
| GRL_CHECK(ptr != nullptr, "out of memory!"); \ |
| AllocGuard ptr##_##alloc##_##guard(ptr); \ |
| |
|
|
| #define REALLOC(ptr, T, size) \ |
| GRL_CHECK((size) > 0, "malloc 0 bytes"); \ |
| ptr = (T*) realloc(ptr, sizeof(T) * (size)); \ |
| GRL_CHECK(ptr != nullptr, "out of memory!"); \ |
| |
|
|
| #define GRL_CHECK_TENSOR(tensor, device, allow_sub_contiguous, allow_null, ...) \ |
| greedrl_check_tensor(__FILENAME__, __LINE__, tensor, #tensor, device, \ |
| allow_sub_contiguous, allow_null, {__VA_ARGS__}); \ |
| |
|
|
| const int GRL_WORKER_START = 0; |
| const int GRL_WORKER_END = 1; |
| const int GRL_TASK = 2; |
| const int GRL_FINISH = 3; |
|
|
| const int MAX_BATCH_SIZE = 100000; |
| const int MAX_TASK_COUNT = 5120; |
| const int MAX_SHARED_MEM = 48128; |
|
|
| using String = std::string; |
| using Device = torch::Device; |
| using Tensor = torch::Tensor; |
| using TensorMap = std::map<String, Tensor>; |
| using TensorList = std::vector<Tensor>; |
|
|
|
|
| inline void greedrl_error(const char* const file, const int64_t line, |
| const char* const format, ...) |
| { |
| const int N = 2048; |
| static char buf[N]; |
|
|
| va_list args; |
| va_start(args, format); |
| int n = vsnprintf(buf, N, format, args); |
| va_end(args); |
|
|
| if(n < N) |
| { |
| snprintf(buf+n, N-n, " at %s:%ld", file, line); |
| } |
|
|
| throw std::runtime_error(buf); |
| } |
|
|
| inline void greedrl_check(const char* const file, const int64_t line, |
| const bool flag, const char* const format, ...) |
| { |
| if(flag) |
| { |
| return; |
| } |
| |
| const int N = 2048; |
| static char buf[N]; |
|
|
| va_list args; |
| va_start(args, format); |
| int n = vsnprintf(buf, N, format, args); |
| va_end(args); |
|
|
| if(n < N) |
| { |
| snprintf(buf+n, N-n, " at %s:%ld", file, line); |
| } |
|
|
| throw std::runtime_error(buf); |
| } |
|
|
| |
| inline bool is_sub_contiguous(const Tensor& tensor) |
| { |
| int dim = tensor.dim(); |
| if(dim==1) return true; |
|
|
| auto sizes = tensor.sizes(); |
| auto strides = tensor.strides(); |
| |
| if(strides[dim-1] != 1) return false; |
| |
| int s = 1; |
| for(int i=dim-2; i>0; i--) |
| { |
| s *= sizes[i+1]; |
| if(strides[i] != s) return false; |
| } |
| |
| return true; |
|
|
| }; |
|
|
| inline void greedrl_check_tensor(const char* const file, |
| const int line, |
| const Tensor& tensor, |
| const String& name, |
| const Device& device, |
| bool allow_sub_contiguous, |
| bool allow_null, |
| std::initializer_list<int> sizes) |
| { |
| greedrl_check(file, line, tensor.numel() < 1000 * 1000 * 1000, "tensor size too large"); |
|
|
| auto device2 = tensor.device(); |
| greedrl_check(file, line, device2==device, |
| "'%s' device is %s, but expect %s", |
| name.c_str(), device2.str().c_str(), device.str().c_str()); |
| |
| bool is_contiguous = allow_sub_contiguous ? is_sub_contiguous(tensor) : tensor.is_contiguous(); |
| greedrl_check(file, line, is_contiguous, "'%s' is not contiguous", name.c_str()); |
| |
| if(allow_null && tensor.data_ptr() == nullptr) return; |
| |
| if(tensor.dim() != sizes.size()) |
| { |
| greedrl_error(file, line, "'%s' dim is %d, but expect %d", name.c_str(), (int)tensor.dim(), (int)sizes.size()); |
| } |
| int i=0; |
| for(auto s:sizes) |
| { |
| greedrl_check(file, line, tensor.size(i)==s, "'%s' size(%d) is %d, but expect %d", name.c_str(), i, (int)tensor.size(i), s); |
| i++; |
| } |
| } |
|
|
|
|
| #ifdef CUDA_FOUND |
|
|
| #include <cuda_runtime_api.h> |
|
|
| #define GRL_CHECK_CUDA(error)\ |
| greedrl_check_cuda(error, __FILENAME__, __LINE__); |
|
|
| inline void greedrl_check_cuda(const cudaError_t& error, |
| const char* file, const int64_t line) |
| { |
| if(error==cudaSuccess) |
| { |
| return; |
| } |
| |
| const int N = 2048; |
| static char buf[N]; |
| snprintf(buf, N, "%s, at %s:%ld", cudaGetErrorString(error), file, line); |
| throw std::runtime_error(buf); |
| } |
|
|
| cudaDeviceProp& cuda_get_device_prop(int i); |
|
|
| #endif |
|
|