| |
|
| |
|
| |
|
| |
|
| |
|
| | from caffe2.proto import caffe2_pb2 |
| | from caffe2.python import core, workspace |
| | from caffe2.python.models.download import ModelDownloader |
| | import numpy as np |
| | import argparse |
| | import time |
| |
|
| |
|
| | def GetArgumentParser(): |
| | parser = argparse.ArgumentParser(description="Caffe2 benchmark.") |
| | parser.add_argument( |
| | "--batch_size", |
| | type=int, |
| | default=128, |
| | help="The batch size." |
| | ) |
| | parser.add_argument("--model", type=str, help="The model to benchmark.") |
| | parser.add_argument( |
| | "--order", |
| | type=str, |
| | default="NCHW", |
| | help="The order to evaluate." |
| | ) |
| | parser.add_argument( |
| | "--device", |
| | type=str, |
| | default="CPU", |
| | help="device to evaluate on." |
| | ) |
| | parser.add_argument( |
| | "--cudnn_ws", |
| | type=int, |
| | help="The cudnn workspace size." |
| | ) |
| | parser.add_argument( |
| | "--iterations", |
| | type=int, |
| | default=10, |
| | help="Number of iterations to run the network." |
| | ) |
| | parser.add_argument( |
| | "--warmup_iterations", |
| | type=int, |
| | default=10, |
| | help="Number of warm-up iterations before benchmarking." |
| | ) |
| | parser.add_argument( |
| | "--forward_only", |
| | action='store_true', |
| | help="If set, only run the forward pass." |
| | ) |
| | parser.add_argument( |
| | "--layer_wise_benchmark", |
| | action='store_true', |
| | help="If True, run the layer-wise benchmark as well." |
| | ) |
| | parser.add_argument( |
| | "--engine", |
| | type=str, |
| | default="", |
| | help="If set, blindly prefer the given engine(s) for every op.") |
| | parser.add_argument( |
| | "--dump_model", |
| | action='store_true', |
| | help="If True, dump the model prototxts to disk." |
| | ) |
| | parser.add_argument("--net_type", type=str, default="simple") |
| | parser.add_argument("--num_workers", type=int, default=2) |
| | parser.add_argument("--use-nvtx", default=False, action='store_true') |
| | parser.add_argument("--htrace_span_log_path", type=str) |
| | return parser |
| |
|
| |
|
| | def benchmark(args): |
| | print('Batch size: {}'.format(args.batch_size)) |
| | mf = ModelDownloader() |
| | init_net, pred_net, value_info = mf.get_c2_model(args.model) |
| | input_shapes = {k : [args.batch_size] + v[-1][1:] for (k, v) in value_info.items()} |
| | print("input info: {}".format(input_shapes)) |
| | external_inputs = {} |
| | for k, v in input_shapes.items(): |
| | external_inputs[k] = np.random.randn(*v).astype(np.float32) |
| |
|
| | if args.device == 'CPU': |
| | device_option = core.DeviceOption(caffe2_pb2.CPU) |
| | elif args.device == 'MKL': |
| | device_option = core.DeviceOption(caffe2_pb2.MKLDNN) |
| | elif args.device == 'IDEEP': |
| | device_option = core.DeviceOption(caffe2_pb2.IDEEP) |
| | else: |
| | raise Exception("Unknown device: {}".format(args.device)) |
| | print("Device option: {}, {}".format(args.device, device_option)) |
| | pred_net.device_option.CopyFrom(device_option) |
| | for op in pred_net.op: |
| | op.device_option.CopyFrom(device_option) |
| |
|
| | |
| | workspace.RunNetOnce(init_net) |
| | bb = workspace.Blobs() |
| | weights = {} |
| | for b in bb: |
| | weights[b] = workspace.FetchBlob(b) |
| | for k, v in external_inputs.items(): |
| | weights[k] = v |
| | workspace.ResetWorkspace() |
| |
|
| | with core.DeviceScope(device_option): |
| | for name, blob in weights.items(): |
| | |
| | workspace.FeedBlob(name, blob, device_option) |
| | workspace.CreateNet(pred_net) |
| | start = time.time() |
| | res = workspace.BenchmarkNet(pred_net.name, |
| | args.warmup_iterations, |
| | args.iterations, |
| | args.layer_wise_benchmark) |
| | print("FPS: {:.2f}".format(1/res[0]*1000*args.batch_size)) |
| |
|
| | if __name__ == '__main__': |
| | args, extra_args = GetArgumentParser().parse_known_args() |
| | if ( |
| | not args.batch_size or not args.model or not args.order |
| | ): |
| | GetArgumentParser().print_help() |
| | benchmark(args) |
| |
|