| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | import sys |
| | import time |
| | import ncnn |
| |
|
| | param_root = "../benchmark/" |
| |
|
| | g_warmup_loop_count = 8 |
| | g_loop_count = 4 |
| | g_enable_cooling_down = True |
| |
|
| | g_vkdev = None |
| | g_blob_vkallocator = None |
| | g_staging_vkallocator = None |
| |
|
| | g_blob_pool_allocator = ncnn.UnlockedPoolAllocator() |
| | g_workspace_pool_allocator = ncnn.PoolAllocator() |
| |
|
| |
|
| | def benchmark(comment, _in, opt): |
| | _in.fill(0.01) |
| |
|
| | g_blob_pool_allocator.clear() |
| | g_workspace_pool_allocator.clear() |
| |
|
| | if opt.use_vulkan_compute: |
| | g_blob_vkallocator.clear() |
| | g_staging_vkallocator.clear() |
| |
|
| | net = ncnn.Net() |
| | net.opt = opt |
| |
|
| | if net.opt.use_vulkan_compute: |
| | net.set_vulkan_device(g_vkdev) |
| |
|
| | net.load_param(param_root + comment + ".param") |
| |
|
| | dr = ncnn.DataReaderFromEmpty() |
| | net.load_model(dr) |
| |
|
| | input_names = net.input_names() |
| | output_names = net.output_names() |
| |
|
| | if g_enable_cooling_down: |
| | time.sleep(10) |
| |
|
| | |
| | for i in range(g_warmup_loop_count): |
| | |
| | with net.create_extractor() as ex: |
| | ex.input(input_names[0], _in) |
| | ex.extract(output_names[0]) |
| |
|
| | time_min = sys.float_info.max |
| | time_max = -sys.float_info.max |
| | time_avg = 0.0 |
| |
|
| | for i in range(g_loop_count): |
| | start = time.time() |
| |
|
| | |
| | ex = net.create_extractor() |
| | ex.input(input_names[0], _in) |
| | ex.extract(output_names[0]) |
| |
|
| | end = time.time() |
| |
|
| | timespan = end - start |
| |
|
| | time_min = timespan if timespan < time_min else time_min |
| | time_max = timespan if timespan > time_max else time_max |
| | time_avg += timespan |
| |
|
| | time_avg /= g_loop_count |
| |
|
| | print( |
| | "%20s min = %7.2f max = %7.2f avg = %7.2f" |
| | % (comment, time_min * 1000, time_max * 1000, time_avg * 1000) |
| | ) |
| |
|
| |
|
| | if __name__ == "__main__": |
| | loop_count = 4 |
| | num_threads = ncnn.get_cpu_count() |
| | powersave = 0 |
| | gpu_device = -1 |
| | cooling_down = 1 |
| |
|
| | argc = len(sys.argv) |
| | if argc >= 2: |
| | loop_count = int(sys.argv[1]) |
| | if argc >= 3: |
| | num_threads = int(sys.argv[2]) |
| | if argc >= 4: |
| | powersave = int(sys.argv[3]) |
| | if argc >= 5: |
| | gpu_device = int(sys.argv[4]) |
| | if argc >= 6: |
| | cooling_down = int(sys.argv[5]) |
| |
|
| | use_vulkan_compute = gpu_device != -1 |
| |
|
| | g_enable_cooling_down = cooling_down != 0 |
| |
|
| | g_loop_count = loop_count |
| |
|
| | g_blob_pool_allocator.set_size_compare_ratio(0.0) |
| | g_workspace_pool_allocator.set_size_compare_ratio(0.5) |
| |
|
| | if use_vulkan_compute: |
| | g_warmup_loop_count = 10 |
| |
|
| | g_vkdev = ncnn.get_gpu_device(gpu_device) |
| |
|
| | g_blob_vkallocator = ncnn.VkBlobAllocator(g_vkdev) |
| | g_staging_vkallocator = ncnn.VkStagingAllocator(g_vkdev) |
| |
|
| | opt = ncnn.Option() |
| | opt.lightmode = True |
| | opt.num_threads = num_threads |
| | opt.blob_allocator = g_blob_pool_allocator |
| | opt.workspace_allocator = g_workspace_pool_allocator |
| | if use_vulkan_compute: |
| | opt.blob_vkallocator = g_blob_vkallocator |
| | opt.workspace_vkallocator = g_blob_vkallocator |
| | opt.staging_vkallocator = g_staging_vkallocator |
| | opt.use_winograd_convolution = True |
| | opt.use_sgemm_convolution = True |
| | opt.use_int8_inference = True |
| | opt.use_vulkan_compute = use_vulkan_compute |
| | opt.use_fp16_packed = True |
| | opt.use_fp16_storage = True |
| | opt.use_fp16_arithmetic = True |
| | opt.use_int8_storage = True |
| | opt.use_int8_arithmetic = True |
| | opt.use_packing_layout = True |
| | opt.use_shader_pack8 = False |
| | opt.use_image_storage = False |
| |
|
| | ncnn.set_cpu_powersave(powersave) |
| | ncnn.set_omp_dynamic(0) |
| | ncnn.set_omp_num_threads(num_threads) |
| |
|
| | print("loop_count =", loop_count) |
| | print("num_threads =", num_threads) |
| | print("powersave =", ncnn.get_cpu_powersave()) |
| | print("gpu_device =", gpu_device) |
| | print("cooling_down =", g_enable_cooling_down) |
| |
|
| | benchmark("squeezenet", ncnn.Mat((227, 227, 3)), opt) |
| | benchmark("squeezenet_int8", ncnn.Mat((227, 227, 3)), opt) |
| | benchmark("mobilenet", ncnn.Mat((224, 224, 3)), opt) |
| | benchmark("mobilenet_int8", ncnn.Mat((224, 224, 3)), opt) |
| | benchmark("mobilenet_v2", ncnn.Mat((224, 224, 3)), opt) |
| | |
| | benchmark("mobilenet_v3", ncnn.Mat((224, 224, 3)), opt) |
| | benchmark("shufflenet", ncnn.Mat((224, 224, 3)), opt) |
| | benchmark("shufflenet_v2", ncnn.Mat((224, 224, 3)), opt) |
| | benchmark("mnasnet", ncnn.Mat((224, 224, 3)), opt) |
| | benchmark("proxylessnasnet", ncnn.Mat((224, 224, 3)), opt) |
| | benchmark("efficientnet_b0", ncnn.Mat((224, 224, 3)), opt) |
| | benchmark("regnety_400m", ncnn.Mat((224, 224, 3)), opt) |
| | benchmark("blazeface", ncnn.Mat((128, 128, 3)), opt) |
| | benchmark("googlenet", ncnn.Mat((224, 224, 3)), opt) |
| | benchmark("googlenet_int8", ncnn.Mat((224, 224, 3)), opt) |
| | benchmark("resnet18", ncnn.Mat((224, 224, 3)), opt) |
| | benchmark("resnet18_int8", ncnn.Mat((224, 224, 3)), opt) |
| | benchmark("alexnet", ncnn.Mat((227, 227, 3)), opt) |
| | benchmark("vgg16", ncnn.Mat((224, 224, 3)), opt) |
| | benchmark("vgg16_int8", ncnn.Mat((224, 224, 3)), opt) |
| | benchmark("resnet50", ncnn.Mat((224, 224, 3)), opt) |
| | benchmark("resnet50_int8", ncnn.Mat((224, 224, 3)), opt) |
| | benchmark("squeezenet_ssd", ncnn.Mat((300, 300, 3)), opt) |
| | benchmark("squeezenet_ssd_int8", ncnn.Mat((300, 300, 3)), opt) |
| | benchmark("mobilenet_ssd", ncnn.Mat((300, 300, 3)), opt) |
| | benchmark("mobilenet_ssd_int8", ncnn.Mat((300, 300, 3)), opt) |
| | benchmark("mobilenet_yolo", ncnn.Mat((416, 416, 3)), opt) |
| | benchmark("mobilenetv2_yolov3", ncnn.Mat((352, 352, 3)), opt) |
| | benchmark("yolov4-tiny", ncnn.Mat((416, 416, 3)), opt) |
| |
|