| |
|
| |
|
| |
|
| |
|
| | from caffe2.python import workspace, core, lstm_benchmark, utils |
| | from copy import copy |
| |
|
| | @utils.debug |
| | def Compare(args): |
| | results = [] |
| | num_iters = 1000 |
| | args.gpu = True |
| | with core.DeviceScope(core.DeviceOption(workspace.GpuDeviceType, 0)): |
| | for batch_size in [64, 128, 256]: |
| | for seq_length in [20, 100]: |
| | for hidden_dim in [40, 100, 400, 800]: |
| | args.batch_size = batch_size |
| | args.seq_length = seq_length |
| | args.hidden_dim = hidden_dim |
| | args.data_size = batch_size * seq_length * num_iters |
| | args.iters_to_report = num_iters // 3 |
| |
|
| | args.implementation = 'own' |
| | t_own = lstm_benchmark.Benchmark(args) |
| | workspace.ResetWorkspace() |
| | args.implementation = 'cudnn' |
| | t_cudnn = lstm_benchmark.Benchmark(args) |
| | workspace.ResetWorkspace() |
| | results.append((copy(args), float(t_own), float(t_cudnn))) |
| | print(args) |
| | print("t_cudnn / t_own: {}".format(t_cudnn / t_own)) |
| |
|
| | for args, t_own, t_cudnn in results: |
| | print("{}: cudnn time: {}, own time: {}, ratio: {}".format( |
| | str(args), t_cudnn, t_own, t_cudnn / t_own)) |
| |
|
| | ratio_sum = 0 |
| | for args, t_own, t_cudnn in results: |
| | ratio = float(t_cudnn) / t_own |
| | ratio_sum += ratio |
| | print("hidden_dim: {}, seq_lengths: {}, batch_size: {}, num_layers: {}:" |
| | " cudnn time: {}, own time: {}, ratio: {}".format( |
| | args.hidden_dim, args.seq_length, args.batch_size, |
| | args.num_layers, t_cudnn, t_own, ratio)) |
| |
|
| | print("Ratio average: {}".format(ratio_sum / len(results))) |
| |
|
| |
|
| | if __name__ == '__main__': |
| | args = lstm_benchmark.GetArgumentParser().parse_args() |
| |
|
| | workspace.GlobalInit([ |
| | 'caffe2', |
| | '--caffe2_log_level=0', |
| | '--caffe2_print_blob_sizes_at_exit=0', |
| | '--caffe2_gpu_memory_tracking=1']) |
| |
|
| | Compare(args) |
| |
|