| | #pragma once |
| |
|
| | #include <ATen/ATen.h> |
| |
|
| | |
| | |
| | |
| |
|
| | #ifdef __CUDACC__ |
| | |
| |
|
| | #define HOST_DEVICE __host__ __device__ |
| | #define INLINE_HOST_DEVICE __host__ __device__ inline |
| | #define FLOOR(x) floor(x) |
| |
|
| | #if __CUDA_ARCH__ >= 600 |
| | |
| | #define ACCUM(x,y) atomicAdd_block(&(x),(y)) |
| | #else |
| | |
| | |
| | template<typename data_t> |
| | __device__ inline data_t atomic_add(data_t *address, data_t val) { |
| | return atomicAdd(address, val); |
| | } |
| |
|
| | template<> |
| | __device__ inline double atomic_add(double *address, double val) { |
| | unsigned long long int* address_as_ull = (unsigned long long int*)address; |
| | unsigned long long int old = *address_as_ull, assumed; |
| | do { |
| | assumed = old; |
| | old = atomicCAS(address_as_ull, assumed, __double_as_longlong(val + __longlong_as_double(assumed))); |
| | } while (assumed != old); |
| | return __longlong_as_double(old); |
| | } |
| |
|
| | #define ACCUM(x,y) atomic_add(&(x),(y)) |
| | #endif |
| |
|
| | #else |
| | |
| |
|
| | #define HOST_DEVICE |
| | #define INLINE_HOST_DEVICE inline |
| | #define FLOOR(x) std::floor(x) |
| | #define ACCUM(x,y) (x) += (y) |
| |
|
| | #endif |