| #pragma once
|
|
|
| #include <ATen/ATen.h>
|
|
|
| |
| |
|
|
|
|
| #ifdef __CUDACC__
|
|
|
|
|
| #define HOST_DEVICE __host__ __device__
|
| #define INLINE_HOST_DEVICE __host__ __device__ inline
|
| #define FLOOR(x) floor(x)
|
|
|
| #if __CUDA_ARCH__ >= 600
|
|
|
| #define ACCUM(x,y) atomicAdd_block(&(x),(y))
|
| #else
|
|
|
|
|
| template<typename data_t>
|
| __device__ inline data_t atomic_add(data_t *address, data_t val) {
|
| return atomicAdd(address, val);
|
| }
|
|
|
| template<>
|
| __device__ inline double atomic_add(double *address, double val) {
|
| unsigned long long int* address_as_ull = (unsigned long long int*)address;
|
| unsigned long long int old = *address_as_ull, assumed;
|
| do {
|
| assumed = old;
|
| old = atomicCAS(address_as_ull, assumed, __double_as_longlong(val + __longlong_as_double(assumed)));
|
| } while (assumed != old);
|
| return __longlong_as_double(old);
|
| }
|
|
|
| #define ACCUM(x,y) atomic_add(&(x),(y))
|
| #endif
|
|
|
| #else
|
|
|
|
|
| #define HOST_DEVICE
|
| #define INLINE_HOST_DEVICE inline
|
| #define FLOOR(x) std::floor(x)
|
| #define ACCUM(x,y) (x) += (y)
|
|
|
| #endif |