#ifndef CAFFE_UTIL_DEVICE_ALTERNATE_H_ #define CAFFE_UTIL_DEVICE_ALTERNATE_H_ #ifdef CPU_ONLY // CPU-only Caffe. #include // Stub out GPU calls as unavailable. #define NO_GPU LOG(FATAL) << "Cannot use GPU in CPU-only Caffe: check mode." #define STUB_GPU(classname) \ template \ void classname::Forward_gpu(const vector*>& bottom, \ const vector*>& top) { NO_GPU; } \ template \ void classname::Backward_gpu(const vector*>& top, \ const vector& propagate_down, \ const vector*>& bottom) { NO_GPU; } \ #define STUB_GPU_FORWARD(classname, funcname) \ template \ void classname::funcname##_##gpu(const vector*>& bottom, \ const vector*>& top) { NO_GPU; } \ #define STUB_GPU_BACKWARD(classname, funcname) \ template \ void classname::funcname##_##gpu(const vector*>& top, \ const vector& propagate_down, \ const vector*>& bottom) { NO_GPU; } \ #else // Normal GPU + CPU Caffe. #include #include #include #include #include // cuda driver types #ifdef USE_CUDNN // cuDNN acceleration library. #include "caffe/util/cudnn.hpp" #endif // // CUDA macros // // CUDA: various checks for different function calls. #define CUDA_CHECK(condition) \ /* Code block avoids redefinition of cudaError_t error */ \ do { \ cudaError_t error = condition; \ CHECK_EQ(error, cudaSuccess) << " " << cudaGetErrorString(error); \ } while (0) #define CUBLAS_CHECK(condition) \ do { \ cublasStatus_t status = condition; \ CHECK_EQ(status, CUBLAS_STATUS_SUCCESS) << " " \ << caffe::cublasGetErrorString(status); \ } while (0) #define CURAND_CHECK(condition) \ do { \ curandStatus_t status = condition; \ CHECK_EQ(status, CURAND_STATUS_SUCCESS) << " " \ << caffe::curandGetErrorString(status); \ } while (0) // CUDA: grid stride looping #define CUDA_KERNEL_LOOP(i, n) \ for (int i = blockIdx.x * blockDim.x + threadIdx.x; \ i < (n); \ i += blockDim.x * gridDim.x) // CUDA: check for error after kernel execution and exit loudly if there is one. #define CUDA_POST_KERNEL_CHECK CUDA_CHECK(cudaPeekAtLastError()) namespace caffe { // CUDA: library error reporting. const char* cublasGetErrorString(cublasStatus_t error); const char* curandGetErrorString(curandStatus_t error); // CUDA: use 512 threads per block const int CAFFE_CUDA_NUM_THREADS = 512; // CUDA: number of blocks for threads. inline int CAFFE_GET_BLOCKS(const int N) { return (N + CAFFE_CUDA_NUM_THREADS - 1) / CAFFE_CUDA_NUM_THREADS; } } // namespace caffe #endif // CPU_ONLY #endif // CAFFE_UTIL_DEVICE_ALTERNATE_H_