| | #ifndef CAFFE_UTIL_GPU_UTIL_H_ |
| | #define CAFFE_UTIL_GPU_UTIL_H_ |
| |
|
| | namespace caffe { |
| |
|
| | template <typename Dtype> |
| | inline __device__ Dtype caffe_gpu_atomic_add(const Dtype val, Dtype* address); |
| |
|
| | template <> |
| | inline __device__ |
| | float caffe_gpu_atomic_add(const float val, float* address) { |
| | return atomicAdd(address, val); |
| | } |
| |
|
| | |
| | |
| | template <> |
| | inline __device__ |
| | double caffe_gpu_atomic_add(const double val, double* address) { |
| | unsigned long long int* address_as_ull = |
| | |
| | reinterpret_cast<unsigned long long int*>(address); |
| | unsigned long long int old = *address_as_ull; |
| | unsigned long long int assumed; |
| | do { |
| | assumed = old; |
| | old = atomicCAS(address_as_ull, assumed, |
| | __double_as_longlong(val + __longlong_as_double(assumed))); |
| | } while (assumed != old); |
| | return __longlong_as_double(old); |
| | } |
| |
|
| | } |
| |
|
| | #endif |
| |
|