|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#define _FLOAT |
|
|
|
|
|
#include <iostream> |
|
|
|
|
|
#include <Eigen/Core> |
|
|
#include "BenchTimer.h" |
|
|
|
|
|
|
|
|
extern "C" { |
|
|
#include <cblas.h> |
|
|
} |
|
|
#include <string> |
|
|
|
|
|
#ifdef _FLOAT |
|
|
typedef float Scalar; |
|
|
#define CBLAS_GEMM cblas_sgemm |
|
|
#else |
|
|
typedef double Scalar; |
|
|
#define CBLAS_GEMM cblas_dgemm |
|
|
#endif |
|
|
|
|
|
|
|
|
typedef Eigen::Matrix<Scalar,Eigen::Dynamic,Eigen::Dynamic> MyMatrix; |
|
|
void bench_eigengemm(MyMatrix& mc, const MyMatrix& ma, const MyMatrix& mb, int nbloops); |
|
|
void check_product(int M, int N, int K); |
|
|
void check_product(void); |
|
|
|
|
|
int main(int argc, char *argv[]) |
|
|
{ |
|
|
|
|
|
#ifdef __GNUC__ |
|
|
{ |
|
|
int aux; |
|
|
asm( |
|
|
"stmxcsr %[aux] \n\t" |
|
|
"orl $32832, %[aux] \n\t" |
|
|
"ldmxcsr %[aux] \n\t" |
|
|
: : [aux] "m" (aux)); |
|
|
} |
|
|
#endif |
|
|
|
|
|
int nbtries=1, nbloops=1, M, N, K; |
|
|
|
|
|
if (argc==2) |
|
|
{ |
|
|
if (std::string(argv[1])=="check") |
|
|
check_product(); |
|
|
else |
|
|
M = N = K = atoi(argv[1]); |
|
|
} |
|
|
else if ((argc==3) && (std::string(argv[1])=="auto")) |
|
|
{ |
|
|
M = N = K = atoi(argv[2]); |
|
|
nbloops = 1000000000/(M*M*M); |
|
|
if (nbloops<1) |
|
|
nbloops = 1; |
|
|
nbtries = 6; |
|
|
} |
|
|
else if (argc==4) |
|
|
{ |
|
|
M = N = K = atoi(argv[1]); |
|
|
nbloops = atoi(argv[2]); |
|
|
nbtries = atoi(argv[3]); |
|
|
} |
|
|
else if (argc==6) |
|
|
{ |
|
|
M = atoi(argv[1]); |
|
|
N = atoi(argv[2]); |
|
|
K = atoi(argv[3]); |
|
|
nbloops = atoi(argv[4]); |
|
|
nbtries = atoi(argv[5]); |
|
|
} |
|
|
else |
|
|
{ |
|
|
std::cout << "Usage: " << argv[0] << " size \n"; |
|
|
std::cout << "Usage: " << argv[0] << " auto size\n"; |
|
|
std::cout << "Usage: " << argv[0] << " size nbloops nbtries\n"; |
|
|
std::cout << "Usage: " << argv[0] << " M N K nbloops nbtries\n"; |
|
|
std::cout << "Usage: " << argv[0] << " check\n"; |
|
|
std::cout << "Options:\n"; |
|
|
std::cout << " size unique size of the 2 matrices (integer)\n"; |
|
|
std::cout << " auto automatically set the number of repetitions and tries\n"; |
|
|
std::cout << " nbloops number of times the GEMM routines is executed\n"; |
|
|
std::cout << " nbtries number of times the loop is benched (return the best try)\n"; |
|
|
std::cout << " M N K sizes of the matrices: MxN = MxK * KxN (integers)\n"; |
|
|
std::cout << " check check eigen product using cblas as a reference\n"; |
|
|
exit(1); |
|
|
} |
|
|
|
|
|
double nbmad = double(M) * double(N) * double(K) * double(nbloops); |
|
|
|
|
|
if (!(std::string(argv[1])=="auto")) |
|
|
std::cout << M << " x " << N << " x " << K << "\n"; |
|
|
|
|
|
Scalar alpha, beta; |
|
|
MyMatrix ma(M,K), mb(K,N), mc(M,N); |
|
|
ma = MyMatrix::Random(M,K); |
|
|
mb = MyMatrix::Random(K,N); |
|
|
mc = MyMatrix::Random(M,N); |
|
|
|
|
|
Eigen::BenchTimer timer; |
|
|
|
|
|
|
|
|
alpha = 1; |
|
|
beta = 1; |
|
|
|
|
|
|
|
|
|
|
|
if (!(std::string(argv[1])=="auto")) |
|
|
{ |
|
|
timer.reset(); |
|
|
for (uint k=0 ; k<nbtries ; ++k) |
|
|
{ |
|
|
timer.start(); |
|
|
for (uint j=0 ; j<nbloops ; ++j) |
|
|
#ifdef EIGEN_DEFAULT_TO_ROW_MAJOR |
|
|
CBLAS_GEMM(CblasRowMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, ma.data(), K, mb.data(), N, beta, mc.data(), N); |
|
|
#else |
|
|
CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, ma.data(), M, mb.data(), K, beta, mc.data(), M); |
|
|
#endif |
|
|
timer.stop(); |
|
|
} |
|
|
if (!(std::string(argv[1])=="auto")) |
|
|
std::cout << "cblas: " << timer.value() << " (" << 1e-3*floor(1e-6*nbmad/timer.value()) << " GFlops/s)\n"; |
|
|
else |
|
|
std::cout << M << " : " << timer.value() << " ; " << 1e-3*floor(1e-6*nbmad/timer.value()) << "\n"; |
|
|
} |
|
|
|
|
|
|
|
|
ma = MyMatrix::Random(M,K); |
|
|
mb = MyMatrix::Random(K,N); |
|
|
mc = MyMatrix::Random(M,N); |
|
|
|
|
|
|
|
|
|
|
|
{ |
|
|
timer.reset(); |
|
|
for (uint k=0 ; k<nbtries ; ++k) |
|
|
{ |
|
|
timer.start(); |
|
|
bench_eigengemm(mc, ma, mb, nbloops); |
|
|
timer.stop(); |
|
|
} |
|
|
if (!(std::string(argv[1])=="auto")) |
|
|
std::cout << "eigen : " << timer.value() << " (" << 1e-3*floor(1e-6*nbmad/timer.value()) << " GFlops/s)\n"; |
|
|
else |
|
|
std::cout << M << " : " << timer.value() << " ; " << 1e-3*floor(1e-6*nbmad/timer.value()) << "\n"; |
|
|
} |
|
|
|
|
|
std::cout << "l1: " << Eigen::l1CacheSize() << std::endl; |
|
|
std::cout << "l2: " << Eigen::l2CacheSize() << std::endl; |
|
|
|
|
|
|
|
|
return 0; |
|
|
} |
|
|
|
|
|
using namespace Eigen; |
|
|
|
|
|
void bench_eigengemm(MyMatrix& mc, const MyMatrix& ma, const MyMatrix& mb, int nbloops) |
|
|
{ |
|
|
for (uint j=0 ; j<nbloops ; ++j) |
|
|
mc.noalias() += ma * mb; |
|
|
} |
|
|
|
|
|
#define MYVERIFY(A,M) if (!(A)) { \ |
|
|
std::cout << "FAIL: " << M << "\n"; \ |
|
|
} |
|
|
void check_product(int M, int N, int K) |
|
|
{ |
|
|
MyMatrix ma(M,K), mb(K,N), mc(M,N), maT(K,M), mbT(N,K), meigen(M,N), mref(M,N); |
|
|
ma = MyMatrix::Random(M,K); |
|
|
mb = MyMatrix::Random(K,N); |
|
|
maT = ma.transpose(); |
|
|
mbT = mb.transpose(); |
|
|
mc = MyMatrix::Random(M,N); |
|
|
|
|
|
MyMatrix::Scalar eps = 1e-4; |
|
|
|
|
|
meigen = mref = mc; |
|
|
CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, 1, ma.data(), M, mb.data(), K, 1, mref.data(), M); |
|
|
meigen += ma * mb; |
|
|
MYVERIFY(meigen.isApprox(mref, eps),". * ."); |
|
|
|
|
|
meigen = mref = mc; |
|
|
CBLAS_GEMM(CblasColMajor, CblasTrans, CblasNoTrans, M, N, K, 1, maT.data(), K, mb.data(), K, 1, mref.data(), M); |
|
|
meigen += maT.transpose() * mb; |
|
|
MYVERIFY(meigen.isApprox(mref, eps),"T * ."); |
|
|
|
|
|
meigen = mref = mc; |
|
|
CBLAS_GEMM(CblasColMajor, CblasTrans, CblasTrans, M, N, K, 1, maT.data(), K, mbT.data(), N, 1, mref.data(), M); |
|
|
meigen += (maT.transpose()) * (mbT.transpose()); |
|
|
MYVERIFY(meigen.isApprox(mref, eps),"T * T"); |
|
|
|
|
|
meigen = mref = mc; |
|
|
CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasTrans, M, N, K, 1, ma.data(), M, mbT.data(), N, 1, mref.data(), M); |
|
|
meigen += ma * mbT.transpose(); |
|
|
MYVERIFY(meigen.isApprox(mref, eps),". * T"); |
|
|
} |
|
|
|
|
|
void check_product(void) |
|
|
{ |
|
|
int M, N, K; |
|
|
for (uint i=0; i<1000; ++i) |
|
|
{ |
|
|
M = internal::random<int>(1,64); |
|
|
N = internal::random<int>(1,768); |
|
|
K = internal::random<int>(1,768); |
|
|
M = (0 + M) * 1; |
|
|
std::cout << M << " x " << N << " x " << K << "\n"; |
|
|
check_product(M, N, K); |
|
|
} |
|
|
} |
|
|
|
|
|
|