|
|
#define EIGEN_USE_THREADS |
|
|
|
|
|
#include <string> |
|
|
|
|
|
#include "tensor_benchmarks.h" |
|
|
|
|
|
#define CREATE_THREAD_POOL(threads) \ |
|
|
Eigen::ThreadPool pool(threads); \ |
|
|
Eigen::ThreadPoolDevice device(&pool, threads); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#define BM_ContractionCPU(D1, D2, D3) \ |
|
|
static void BM_##Contraction##_##D1##x##D2##x##D3(int iters, int Threads) { \ |
|
|
StopBenchmarkTiming(); \ |
|
|
CREATE_THREAD_POOL(Threads); \ |
|
|
BenchmarkSuite<Eigen::ThreadPoolDevice, float> suite(device, D1, D2, D3); \ |
|
|
suite.contraction(iters); \ |
|
|
} \ |
|
|
BENCHMARK_RANGE(BM_##Contraction##_##D1##x##D2##x##D3, 1, 32); |
|
|
|
|
|
|
|
|
|
|
|
BM_ContractionCPU(1, 2000, 500); |
|
|
BM_ContractionCPU(2000, 1, 500); |
|
|
|
|
|
|
|
|
BM_ContractionCPU(250, 3, 512); |
|
|
BM_ContractionCPU(1500, 3, 512); |
|
|
|
|
|
BM_ContractionCPU(512, 800, 4); |
|
|
BM_ContractionCPU(512, 80, 800); |
|
|
BM_ContractionCPU(512, 80, 13522); |
|
|
BM_ContractionCPU(1, 80, 13522); |
|
|
|
|
|
BM_ContractionCPU(3200, 512, 4); |
|
|
BM_ContractionCPU(3200, 512, 80); |
|
|
BM_ContractionCPU(3200, 80, 512); |
|
|
|