| | #ifndef CAFFE_UTIL_MATH_FUNCTIONS_H_ |
| | #define CAFFE_UTIL_MATH_FUNCTIONS_H_ |
| |
|
| | #include <stdint.h> |
| | #include <cmath> |
| |
|
| | #include "glog/logging.h" |
| |
|
| | #include "caffe/common.hpp" |
| | #include "caffe/util/device_alternate.hpp" |
| | #include "caffe/util/mkl_alternate.hpp" |
| |
|
| | namespace caffe { |
| |
|
| | |
| | |
| | template <typename Dtype> |
| | void caffe_cpu_gemm(const CBLAS_TRANSPOSE TransA, |
| | const CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, |
| | const Dtype alpha, const Dtype* A, const Dtype* B, const Dtype beta, |
| | Dtype* C); |
| |
|
| | template <typename Dtype> |
| | void caffe_cpu_gemv(const CBLAS_TRANSPOSE TransA, const int M, const int N, |
| | const Dtype alpha, const Dtype* A, const Dtype* x, const Dtype beta, |
| | Dtype* y); |
| |
|
| | template <typename Dtype> |
| | void caffe_axpy(const int N, const Dtype alpha, const Dtype* X, |
| | Dtype* Y); |
| |
|
| | template <typename Dtype> |
| | void caffe_cpu_axpby(const int N, const Dtype alpha, const Dtype* X, |
| | const Dtype beta, Dtype* Y); |
| |
|
| | template <typename Dtype> |
| | void caffe_copy(const int N, const Dtype *X, Dtype *Y); |
| |
|
| | template <typename Dtype> |
| | void caffe_set(const int N, const Dtype alpha, Dtype *X); |
| |
|
| | inline void caffe_memset(const size_t N, const int alpha, void* X) { |
| | memset(X, alpha, N); |
| | } |
| |
|
| | template <typename Dtype> |
| | void caffe_add_scalar(const int N, const Dtype alpha, Dtype *X); |
| |
|
| | template <typename Dtype> |
| | void caffe_scal(const int N, const Dtype alpha, Dtype *X); |
| |
|
| | template <typename Dtype> |
| | void caffe_sqr(const int N, const Dtype* a, Dtype* y); |
| |
|
| | template <typename Dtype> |
| | void caffe_sqrt(const int N, const Dtype* a, Dtype* y); |
| |
|
| | template <typename Dtype> |
| | void caffe_add(const int N, const Dtype* a, const Dtype* b, Dtype* y); |
| |
|
| | template <typename Dtype> |
| | void caffe_sub(const int N, const Dtype* a, const Dtype* b, Dtype* y); |
| |
|
| | template <typename Dtype> |
| | void caffe_mul(const int N, const Dtype* a, const Dtype* b, Dtype* y); |
| |
|
| | template <typename Dtype> |
| | void caffe_div(const int N, const Dtype* a, const Dtype* b, Dtype* y); |
| |
|
| | template <typename Dtype> |
| | void caffe_powx(const int n, const Dtype* a, const Dtype b, Dtype* y); |
| |
|
| | unsigned int caffe_rng_rand(); |
| |
|
| | template <typename Dtype> |
| | Dtype caffe_nextafter(const Dtype b); |
| |
|
| | template <typename Dtype> |
| | void caffe_rng_uniform(const int n, const Dtype a, const Dtype b, Dtype* r); |
| |
|
| | template <typename Dtype> |
| | void caffe_rng_gaussian(const int n, const Dtype mu, const Dtype sigma, |
| | Dtype* r); |
| |
|
| | template <typename Dtype> |
| | void caffe_rng_bernoulli(const int n, const Dtype p, int* r); |
| |
|
| | template <typename Dtype> |
| | void caffe_rng_bernoulli(const int n, const Dtype p, unsigned int* r); |
| |
|
| | template <typename Dtype> |
| | void caffe_exp(const int n, const Dtype* a, Dtype* y); |
| |
|
| | template <typename Dtype> |
| | void caffe_log(const int n, const Dtype* a, Dtype* y); |
| |
|
| | template <typename Dtype> |
| | void caffe_abs(const int n, const Dtype* a, Dtype* y); |
| |
|
| | template <typename Dtype> |
| | Dtype caffe_cpu_dot(const int n, const Dtype* x, const Dtype* y); |
| |
|
| | template <typename Dtype> |
| | Dtype caffe_cpu_strided_dot(const int n, const Dtype* x, const int incx, |
| | const Dtype* y, const int incy); |
| |
|
| | |
| | template <typename Dtype> |
| | Dtype caffe_cpu_asum(const int n, const Dtype* x); |
| |
|
| | |
| | |
| | template<typename Dtype> |
| | inline int8_t caffe_sign(Dtype val) { |
| | return (Dtype(0) < val) - (val < Dtype(0)); |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | #define DEFINE_CAFFE_CPU_UNARY_FUNC(name, operation) \ |
| | template<typename Dtype> \ |
| | void caffe_cpu_##name(const int n, const Dtype* x, Dtype* y) { \ |
| | CHECK_GT(n, 0); CHECK(x); CHECK(y); \ |
| | for (int i = 0; i < n; ++i) { \ |
| | operation; \ |
| | } \ |
| | } |
| |
|
| | |
| | DEFINE_CAFFE_CPU_UNARY_FUNC(sign, y[i] = caffe_sign<Dtype>(x[i])) |
| |
|
| | |
| | |
| | |
| | |
| | DEFINE_CAFFE_CPU_UNARY_FUNC(sgnbit, \ |
| | y[i] = static_cast<bool>((std::signbit)(x[i]))) |
| |
|
| | DEFINE_CAFFE_CPU_UNARY_FUNC(fabs, y[i] = std::fabs(x[i])) |
| |
|
| | template <typename Dtype> |
| | void caffe_cpu_scale(const int n, const Dtype alpha, const Dtype *x, Dtype* y); |
| |
|
| | #ifndef CPU_ONLY |
| |
|
| | |
| | |
| | |
| | template <typename Dtype> |
| | void caffe_gpu_gemm(const CBLAS_TRANSPOSE TransA, |
| | const CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, |
| | const Dtype alpha, const Dtype* A, const Dtype* B, const Dtype beta, |
| | Dtype* C); |
| |
|
| | template <typename Dtype> |
| | void caffe_gpu_gemv(const CBLAS_TRANSPOSE TransA, const int M, const int N, |
| | const Dtype alpha, const Dtype* A, const Dtype* x, const Dtype beta, |
| | Dtype* y); |
| |
|
| | template <typename Dtype> |
| | void caffe_gpu_axpy(const int N, const Dtype alpha, const Dtype* X, |
| | Dtype* Y); |
| |
|
| | template <typename Dtype> |
| | void caffe_gpu_axpby(const int N, const Dtype alpha, const Dtype* X, |
| | const Dtype beta, Dtype* Y); |
| |
|
| | void caffe_gpu_memcpy(const size_t N, const void *X, void *Y); |
| |
|
| | template <typename Dtype> |
| | void caffe_gpu_set(const int N, const Dtype alpha, Dtype *X); |
| |
|
| | inline void caffe_gpu_memset(const size_t N, const int alpha, void* X) { |
| | #ifndef CPU_ONLY |
| | CUDA_CHECK(cudaMemset(X, alpha, N)); |
| | #else |
| | NO_GPU; |
| | #endif |
| | } |
| |
|
| | template <typename Dtype> |
| | void caffe_gpu_add_scalar(const int N, const Dtype alpha, Dtype *X); |
| |
|
| | template <typename Dtype> |
| | void caffe_gpu_scal(const int N, const Dtype alpha, Dtype *X); |
| |
|
| | #ifndef CPU_ONLY |
| | template <typename Dtype> |
| | void caffe_gpu_scal(const int N, const Dtype alpha, Dtype* X, cudaStream_t str); |
| | #endif |
| |
|
| | template <typename Dtype> |
| | void caffe_gpu_add(const int N, const Dtype* a, const Dtype* b, Dtype* y); |
| |
|
| | template <typename Dtype> |
| | void caffe_gpu_sub(const int N, const Dtype* a, const Dtype* b, Dtype* y); |
| |
|
| | template <typename Dtype> |
| | void caffe_gpu_mul(const int N, const Dtype* a, const Dtype* b, Dtype* y); |
| |
|
| | template <typename Dtype> |
| | void caffe_gpu_div(const int N, const Dtype* a, const Dtype* b, Dtype* y); |
| |
|
| | template <typename Dtype> |
| | void caffe_gpu_abs(const int n, const Dtype* a, Dtype* y); |
| |
|
| | template <typename Dtype> |
| | void caffe_gpu_exp(const int n, const Dtype* a, Dtype* y); |
| |
|
| | template <typename Dtype> |
| | void caffe_gpu_log(const int n, const Dtype* a, Dtype* y); |
| |
|
| | template <typename Dtype> |
| | void caffe_gpu_powx(const int n, const Dtype* a, const Dtype b, Dtype* y); |
| |
|
| | template <typename Dtype> |
| | void caffe_gpu_sqrt(const int n, const Dtype* a, Dtype* y); |
| |
|
| | |
| | |
| | void caffe_gpu_rng_uniform(const int n, unsigned int* r); |
| |
|
| | |
| | |
| | |
| | |
| | |
| | template <typename Dtype> |
| | void caffe_gpu_rng_uniform(const int n, const Dtype a, const Dtype b, Dtype* r); |
| |
|
| | template <typename Dtype> |
| | void caffe_gpu_rng_gaussian(const int n, const Dtype mu, const Dtype sigma, |
| | Dtype* r); |
| |
|
| | template <typename Dtype> |
| | void caffe_gpu_rng_bernoulli(const int n, const Dtype p, int* r); |
| |
|
| | template <typename Dtype> |
| | void caffe_gpu_dot(const int n, const Dtype* x, const Dtype* y, Dtype* out); |
| |
|
| | template <typename Dtype> |
| | void caffe_gpu_asum(const int n, const Dtype* x, Dtype* y); |
| |
|
| | template<typename Dtype> |
| | void caffe_gpu_sign(const int n, const Dtype* x, Dtype* y); |
| |
|
| | template<typename Dtype> |
| | void caffe_gpu_sgnbit(const int n, const Dtype* x, Dtype* y); |
| |
|
| | template <typename Dtype> |
| | void caffe_gpu_fabs(const int n, const Dtype* x, Dtype* y); |
| |
|
| | template <typename Dtype> |
| | void caffe_gpu_scale(const int n, const Dtype alpha, const Dtype *x, Dtype* y); |
| |
|
| | #define DEFINE_AND_INSTANTIATE_GPU_UNARY_FUNC(name, operation) \ |
| | template<typename Dtype> \ |
| | __global__ void name##_kernel(const int n, const Dtype* x, Dtype* y) { \ |
| | CUDA_KERNEL_LOOP(index, n) { \ |
| | operation; \ |
| | } \ |
| | } \ |
| | template <> \ |
| | void caffe_gpu_##name<float>(const int n, const float* x, float* y) { \ |
| | \ |
| | name##_kernel<float><<<CAFFE_GET_BLOCKS(n), CAFFE_CUDA_NUM_THREADS>>>( \ |
| | n, x, y); \ |
| | } \ |
| | template <> \ |
| | void caffe_gpu_##name<double>(const int n, const double* x, double* y) { \ |
| | \ |
| | name##_kernel<double><<<CAFFE_GET_BLOCKS(n), CAFFE_CUDA_NUM_THREADS>>>( \ |
| | n, x, y); \ |
| | } |
| |
|
| | #endif |
| |
|
| | } |
| |
|
| | #endif |
| |
|