| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | #include "net.h" |
| |
|
| | #include "cpu.h" |
| | #include "datareader.h" |
| | #include "layer_type.h" |
| | #include "modelbin.h" |
| | #include "paramdict.h" |
| |
|
| | #include <stdarg.h> |
| | #include <stdint.h> |
| | #include <string.h> |
| |
|
| | #if NCNN_BENCHMARK |
| | #include "benchmark.h" |
| | #endif |
| |
|
| | #if NCNN_VULKAN |
| | #include "command.h" |
| | #include "pipelinecache.h" |
| | #endif |
| |
|
| | namespace ncnn { |
| |
|
| | class NetPrivate |
| | { |
| | public: |
| | NetPrivate(Option& _opt); |
| |
|
| | Option& opt; |
| |
|
| | #if NCNN_VULKAN |
| |
|
| | int upload_model(); |
| |
|
| | #endif |
| |
|
| | friend class Extractor; |
| | int forward_layer(int layer_index, std::vector<Mat>& blob_mats, const Option& opt) const; |
| |
|
| | #if NCNN_VULKAN |
| | int forward_layer(int layer_index, std::vector<Mat>& blob_mats, std::vector<VkMat>& blob_mats_gpu, VkCompute& cmd, const Option& opt) const; |
| | int forward_layer(int layer_index, std::vector<Mat>& blob_mats, std::vector<VkMat>& blob_mats_gpu, std::vector<VkImageMat>& blob_mats_gpu_image, VkCompute& cmd, const Option& opt) const; |
| | #endif |
| |
|
| | int convert_layout(Mat& bottom_blob, const Layer* layer, const Option& opt) const; |
| |
|
| | int do_forward_layer(const Layer* layer, std::vector<Mat>& blob_mats, const Option& opt) const; |
| | #if NCNN_VULKAN |
| | int do_forward_layer(const Layer* layer, std::vector<VkMat>& blob_mats_gpu, VkCompute& cmd, const Option& opt) const; |
| | int do_forward_layer(const Layer* layer, std::vector<VkImageMat>& blob_mats_gpu_image, VkCompute& cmd, const Option& opt) const; |
| | #endif |
| |
|
| | void update_input_output_indexes(); |
| | #if NCNN_STRING |
| | void update_input_output_names(); |
| | #endif |
| |
|
| | std::vector<Blob> blobs; |
| | std::vector<Layer*> layers; |
| |
|
| | std::vector<int> input_blob_indexes; |
| | std::vector<int> output_blob_indexes; |
| | #if NCNN_STRING |
| | std::vector<const char*> input_blob_names; |
| | std::vector<const char*> output_blob_names; |
| | #endif |
| |
|
| | std::vector<custom_layer_registry_entry> custom_layer_registry; |
| | std::vector<overwrite_builtin_layer_registry_entry> overwrite_builtin_layer_registry; |
| |
|
| | PoolAllocator* local_blob_allocator; |
| | PoolAllocator* local_workspace_allocator; |
| |
|
| | #if NCNN_VULKAN |
| | const VulkanDevice* vkdev; |
| |
|
| | VkAllocator* weight_vkallocator; |
| | VkAllocator* weight_staging_vkallocator; |
| |
|
| | PipelineCache* pipeline_cache; |
| | #endif |
| | }; |
| |
|
| | NetPrivate::NetPrivate(Option& _opt) |
| | : opt(_opt) |
| | { |
| | local_blob_allocator = 0; |
| | local_workspace_allocator = 0; |
| |
|
| | #if NCNN_VULKAN |
| | vkdev = 0; |
| | weight_vkallocator = 0; |
| | weight_staging_vkallocator = 0; |
| | pipeline_cache = 0; |
| | #endif |
| | } |
| |
|
| | static Option get_masked_option(const Option& opt, int featmask) |
| | { |
| | |
| | Option opt1 = opt; |
| | opt1.use_fp16_arithmetic = opt1.use_fp16_arithmetic && !(featmask & (1 << 0)); |
| | opt1.use_fp16_storage = opt1.use_fp16_storage && !(featmask & (1 << 1)); |
| | opt1.use_fp16_packed = opt1.use_fp16_packed && !(featmask & (1 << 1)); |
| | opt1.use_bf16_storage = opt1.use_bf16_storage && !(featmask & (1 << 2)); |
| | opt1.use_int8_packed = opt1.use_int8_packed && !(featmask & (1 << 3)); |
| | opt1.use_int8_storage = opt1.use_int8_storage && !(featmask & (1 << 3)); |
| | opt1.use_int8_arithmetic = opt1.use_int8_arithmetic && !(featmask & (1 << 3)); |
| | opt1.use_vulkan_compute = opt1.use_vulkan_compute && !(featmask & (1 << 4)); |
| | opt1.use_image_storage = opt1.use_image_storage && !(featmask & (1 << 4)); |
| | opt1.use_tensor_storage = opt1.use_tensor_storage && !(featmask & (1 << 4)); |
| | opt1.use_sgemm_convolution = opt1.use_sgemm_convolution && !(featmask & (1 << 5)); |
| | opt1.use_winograd_convolution = opt1.use_winograd_convolution && !(featmask & (1 << 6)); |
| |
|
| | return opt1; |
| | } |
| |
|
| | #if NCNN_VULKAN |
| | int NetPrivate::upload_model() |
| | { |
| | ncnn::VkTransfer cmd(vkdev); |
| |
|
| | |
| | if (!weight_vkallocator) |
| | { |
| | weight_vkallocator = new VkWeightAllocator(vkdev); |
| | } |
| | if (!weight_staging_vkallocator) |
| | { |
| | weight_staging_vkallocator = new VkWeightStagingAllocator(vkdev); |
| | } |
| |
|
| | Option opt_upload = opt; |
| | opt_upload.blob_vkallocator = weight_vkallocator; |
| | opt_upload.workspace_vkallocator = weight_vkallocator; |
| | opt_upload.staging_vkallocator = weight_staging_vkallocator; |
| |
|
| | for (size_t i = 0; i < layers.size(); i++) |
| | { |
| | if (layers[i]->support_vulkan) |
| | { |
| | int uret = layers[i]->upload_model(cmd, get_masked_option(opt_upload, layers[i]->featmask)); |
| | if (uret != 0) |
| | { |
| | NCNN_LOGE("layer upload_model %d failed", (int)i); |
| | return -1; |
| | } |
| | } |
| | } |
| |
|
| | return cmd.submit_and_wait(); |
| | } |
| | #endif |
| |
|
| | int NetPrivate::forward_layer(int layer_index, std::vector<Mat>& blob_mats, const Option& opt) const |
| | { |
| | const Layer* layer = layers[layer_index]; |
| |
|
| | |
| |
|
| | |
| | for (size_t i = 0; i < layer->bottoms.size(); i++) |
| | { |
| | int bottom_blob_index = layer->bottoms[i]; |
| |
|
| | if (blob_mats[bottom_blob_index].dims == 0) |
| | { |
| | int ret = forward_layer(blobs[bottom_blob_index].producer, blob_mats, opt); |
| | if (ret != 0) |
| | return ret; |
| | } |
| | } |
| |
|
| | #if NCNN_BENCHMARK |
| | double start = get_current_time(); |
| | Mat bottom_blob; |
| | if (layer->one_blob_only) |
| | { |
| | int bottom_blob_index = layer->bottoms[0]; |
| | bottom_blob.dims = blob_mats[bottom_blob_index].dims; |
| | bottom_blob.w = blob_mats[bottom_blob_index].w; |
| | bottom_blob.h = blob_mats[bottom_blob_index].h; |
| | bottom_blob.c = blob_mats[bottom_blob_index].c; |
| | bottom_blob.elempack = blob_mats[bottom_blob_index].elempack; |
| | bottom_blob.elemsize = blob_mats[bottom_blob_index].elemsize; |
| | } |
| | #endif |
| | int ret = 0; |
| | if (layer->featmask) |
| | { |
| | ret = do_forward_layer(layer, blob_mats, get_masked_option(opt, layer->featmask)); |
| | } |
| | else |
| | { |
| | ret = do_forward_layer(layer, blob_mats, opt); |
| | } |
| | #if NCNN_BENCHMARK |
| | double end = get_current_time(); |
| | if (layer->one_blob_only) |
| | { |
| | int top_blob_index = layer->tops[0]; |
| | benchmark(layer, bottom_blob, blob_mats[top_blob_index], start, end); |
| | } |
| | else |
| | { |
| | benchmark(layer, start, end); |
| | } |
| | #endif |
| | if (ret != 0) |
| | return ret; |
| |
|
| | |
| | |
| | |
| |
|
| | return 0; |
| | } |
| |
|
| | #if NCNN_VULKAN |
| | int NetPrivate::forward_layer(int layer_index, std::vector<Mat>& blob_mats, std::vector<VkMat>& blob_mats_gpu, VkCompute& cmd, const Option& opt) const |
| | { |
| | const Layer* layer = layers[layer_index]; |
| |
|
| | |
| |
|
| | bool cmd_submit_and_wait = false; |
| |
|
| | |
| | for (size_t i = 0; i < layer->bottoms.size(); i++) |
| | { |
| | int bottom_blob_index = layer->bottoms[i]; |
| |
|
| | if (blob_mats_gpu[bottom_blob_index].dims == 0 && blob_mats[bottom_blob_index].dims == 0) |
| | { |
| | int ret = forward_layer(blobs[bottom_blob_index].producer, blob_mats, blob_mats_gpu, cmd, opt); |
| | if (ret != 0) |
| | return ret; |
| | } |
| |
|
| | if (layer->support_vulkan) |
| | { |
| | if (blob_mats_gpu[bottom_blob_index].dims == 0) |
| | { |
| | |
| | cmd.record_upload(blob_mats[bottom_blob_index], blob_mats_gpu[bottom_blob_index], opt); |
| |
|
| | if (opt.lightmode) |
| | { |
| | |
| | blob_mats[bottom_blob_index].release(); |
| | } |
| | } |
| | } |
| | else |
| | { |
| | if (blob_mats[bottom_blob_index].dims == 0) |
| | { |
| | Option opt_download = opt; |
| | opt_download.use_packing_layout = layer->support_packing; |
| |
|
| | |
| | cmd.record_download(blob_mats_gpu[bottom_blob_index], blob_mats[bottom_blob_index], opt_download); |
| |
|
| | if (opt.lightmode) |
| | { |
| | |
| | blob_mats_gpu[bottom_blob_index].release(); |
| | } |
| |
|
| | cmd_submit_and_wait = true; |
| | } |
| | } |
| | } |
| |
|
| | int ret; |
| | if (cmd_submit_and_wait) |
| | { |
| | ret = cmd.submit_and_wait(); |
| |
|
| | #if NCNN_BENCHMARK |
| | std::vector<uint64_t> results(layer_index * 2); |
| | cmd.get_query_pool_results(0, layer_index * 2, results); |
| | for (int i = 0; i < layer_index; i++) |
| | { |
| | uint64_t start = results[i * 2]; |
| | uint64_t end = results[i * 2 + 1]; |
| | if (start == 0 || end == 0) |
| | continue; |
| |
|
| | double duration_us = (end - start) * vkdev->info.timestamp_period() / 1000; |
| | NCNN_LOGE("%-24s %-30s %8.2lfus |", layers[i]->type.c_str(), layers[i]->name.c_str(), duration_us); |
| | } |
| | #endif |
| |
|
| | cmd.reset(); |
| | if (ret != 0) |
| | return ret; |
| | } |
| |
|
| | if (layer->support_vulkan) |
| | { |
| | #if NCNN_BENCHMARK |
| | cmd.record_write_timestamp(layer_index * 2); |
| | #endif |
| | if (layer->featmask) |
| | { |
| | ret = do_forward_layer(layer, blob_mats_gpu, cmd, get_masked_option(opt, layer->featmask)); |
| | } |
| | else |
| | { |
| | ret = do_forward_layer(layer, blob_mats_gpu, cmd, opt); |
| | } |
| | #if NCNN_BENCHMARK |
| | cmd.record_write_timestamp(layer_index * 2 + 1); |
| | #endif |
| | } |
| | else |
| | { |
| | #if NCNN_BENCHMARK |
| | double start = get_current_time(); |
| | Mat bottom_blob; |
| | if (layer->one_blob_only) |
| | { |
| | int bottom_blob_index = layer->bottoms[0]; |
| | bottom_blob = blob_mats[bottom_blob_index].shape(); |
| | } |
| | #endif |
| | if (layer->featmask) |
| | { |
| | ret = do_forward_layer(layer, blob_mats, get_masked_option(opt, layer->featmask)); |
| | } |
| | else |
| | { |
| | ret = do_forward_layer(layer, blob_mats, opt); |
| | } |
| | #if NCNN_BENCHMARK |
| | double end = get_current_time(); |
| | if (layer->one_blob_only) |
| | { |
| | int top_blob_index = layer->tops[0]; |
| | benchmark(layer, bottom_blob, blob_mats[top_blob_index], start, end); |
| | } |
| | else |
| | { |
| | benchmark(layer, start, end); |
| | } |
| | #endif |
| | } |
| | if (ret != 0) |
| | return ret; |
| |
|
| | |
| |
|
| | return 0; |
| | } |
| |
|
| | int NetPrivate::forward_layer(int layer_index, std::vector<Mat>& blob_mats, std::vector<VkMat>& blob_mats_gpu, std::vector<VkImageMat>& blob_mats_gpu_image, VkCompute& cmd, const Option& opt) const |
| | { |
| | const Layer* layer = layers[layer_index]; |
| |
|
| | |
| |
|
| | bool cmd_submit_and_wait = false; |
| | bool image_allocation_failed = false; |
| |
|
| | IMAGE_ALLOCATION_FAILED: |
| |
|
| | if (image_allocation_failed) |
| | { |
| | #if NCNN_STRING |
| | NCNN_LOGE("forward_layer %d %s image allocation failed, fallback to cpu", layer_index, layer->name.c_str()); |
| | #else |
| | NCNN_LOGE("forward_layer %d image allocation failed, fallback to cpu", layer_index); |
| | #endif |
| | } |
| |
|
| | |
| | for (size_t i = 0; i < layer->bottoms.size(); i++) |
| | { |
| | int bottom_blob_index = layer->bottoms[i]; |
| |
|
| | if (blob_mats_gpu_image[bottom_blob_index].dims == 0 && blob_mats_gpu[bottom_blob_index].dims == 0 && blob_mats[bottom_blob_index].dims == 0) |
| | { |
| | int ret = forward_layer(blobs[bottom_blob_index].producer, blob_mats, blob_mats_gpu, blob_mats_gpu_image, cmd, opt); |
| | if (ret != 0) |
| | return ret; |
| | } |
| |
|
| | if (layer->support_vulkan && !image_allocation_failed) |
| | { |
| | if (layer->support_image_storage) |
| | { |
| | if (blob_mats_gpu_image[bottom_blob_index].dims == 0) |
| | { |
| | if (blob_mats_gpu[bottom_blob_index].dims == 0) |
| | { |
| | |
| | cmd.record_upload(blob_mats[bottom_blob_index], blob_mats_gpu_image[bottom_blob_index], opt); |
| |
|
| | if (blob_mats_gpu_image[bottom_blob_index].empty()) |
| | { |
| | image_allocation_failed = true; |
| | goto IMAGE_ALLOCATION_FAILED; |
| | } |
| |
|
| | if (opt.lightmode) |
| | { |
| | |
| | blob_mats[bottom_blob_index].release(); |
| | } |
| | } |
| | else |
| | { |
| | |
| | cmd.record_buffer_to_image(blob_mats_gpu[bottom_blob_index], blob_mats_gpu_image[bottom_blob_index], opt); |
| |
|
| | if (blob_mats_gpu_image[bottom_blob_index].empty()) |
| | { |
| | image_allocation_failed = true; |
| | goto IMAGE_ALLOCATION_FAILED; |
| | } |
| |
|
| | if (opt.lightmode) |
| | { |
| | |
| | blob_mats_gpu[bottom_blob_index].release(); |
| | } |
| | } |
| | } |
| | } |
| | else |
| | { |
| | if (blob_mats_gpu[bottom_blob_index].dims == 0) |
| | { |
| | if (blob_mats_gpu_image[bottom_blob_index].dims == 0) |
| | { |
| | |
| | cmd.record_upload(blob_mats[bottom_blob_index], blob_mats_gpu[bottom_blob_index], opt); |
| |
|
| | if (opt.lightmode) |
| | { |
| | |
| | blob_mats[bottom_blob_index].release(); |
| | } |
| | } |
| | else |
| | { |
| | |
| | cmd.record_image_to_buffer(blob_mats_gpu_image[bottom_blob_index], blob_mats_gpu[bottom_blob_index], opt); |
| |
|
| | if (opt.lightmode) |
| | { |
| | |
| | blob_mats_gpu_image[bottom_blob_index].release(); |
| | } |
| | } |
| | } |
| | } |
| | } |
| | else |
| | { |
| | if (blob_mats[bottom_blob_index].dims == 0) |
| | { |
| | if (blob_mats_gpu_image[bottom_blob_index].dims == 0) |
| | { |
| | |
| | cmd.record_download(blob_mats_gpu[bottom_blob_index], blob_mats[bottom_blob_index], opt); |
| |
|
| | if (opt.lightmode) |
| | { |
| | |
| | blob_mats_gpu[bottom_blob_index].release(); |
| | } |
| |
|
| | cmd_submit_and_wait = true; |
| | } |
| | else |
| | { |
| | |
| | cmd.record_download(blob_mats_gpu_image[bottom_blob_index], blob_mats[bottom_blob_index], opt); |
| |
|
| | if (opt.lightmode) |
| | { |
| | |
| | blob_mats_gpu_image[bottom_blob_index].release(); |
| | } |
| |
|
| | cmd_submit_and_wait = true; |
| | } |
| | } |
| | } |
| | } |
| |
|
| | int ret; |
| | if (cmd_submit_and_wait) |
| | { |
| | ret = cmd.submit_and_wait(); |
| |
|
| | #if NCNN_BENCHMARK |
| | std::vector<uint64_t> results(layer_index * 2); |
| | cmd.get_query_pool_results(0, layer_index * 2, results); |
| | for (int i = 0; i < layer_index; i++) |
| | { |
| | uint64_t start = results[i * 2]; |
| | uint64_t end = results[i * 2 + 1]; |
| | if (start == 0 || end == 0) |
| | continue; |
| |
|
| | double duration_us = (end - start) * vkdev->info.timestamp_period() / 1000; |
| | NCNN_LOGE("%-24s %-30s %8.2lfus |", layers[i]->type.c_str(), layers[i]->name.c_str(), duration_us); |
| | } |
| | #endif |
| |
|
| | cmd.reset(); |
| |
|
| | if (ret != 0) |
| | return ret; |
| | } |
| |
|
| | if (layer->support_vulkan && !image_allocation_failed) |
| | { |
| | #if NCNN_BENCHMARK |
| | cmd.record_write_timestamp(layer_index * 2); |
| | #endif |
| | if (layer->support_image_storage) |
| | { |
| | if (layer->featmask) |
| | { |
| | ret = do_forward_layer(layer, blob_mats_gpu_image, cmd, get_masked_option(opt, layer->featmask)); |
| | } |
| | else |
| | { |
| | ret = do_forward_layer(layer, blob_mats_gpu_image, cmd, opt); |
| | } |
| | if (ret == -100) |
| | { |
| | image_allocation_failed = true; |
| | goto IMAGE_ALLOCATION_FAILED; |
| | } |
| | } |
| | else |
| | { |
| | if (layer->featmask) |
| | { |
| | ret = do_forward_layer(layer, blob_mats_gpu, cmd, get_masked_option(opt, layer->featmask)); |
| | } |
| | else |
| | { |
| | ret = do_forward_layer(layer, blob_mats_gpu, cmd, opt); |
| | } |
| | } |
| | #if NCNN_BENCHMARK |
| | cmd.record_write_timestamp(layer_index * 2 + 1); |
| | #endif |
| | } |
| | else |
| | { |
| | #if NCNN_BENCHMARK |
| | double start = get_current_time(); |
| | Mat bottom_blob; |
| | if (layer->one_blob_only) |
| | { |
| | int bottom_blob_index = layer->bottoms[0]; |
| | bottom_blob = blob_mats[bottom_blob_index].shape(); |
| | } |
| | #endif |
| | if (layer->featmask) |
| | { |
| | ret = do_forward_layer(layer, blob_mats, get_masked_option(opt, layer->featmask)); |
| | } |
| | else |
| | { |
| | ret = do_forward_layer(layer, blob_mats, opt); |
| | } |
| | #if NCNN_BENCHMARK |
| | double end = get_current_time(); |
| | if (layer->one_blob_only) |
| | { |
| | int top_blob_index = layer->tops[0]; |
| | benchmark(layer, bottom_blob, blob_mats[top_blob_index], start, end); |
| | } |
| | else |
| | { |
| | benchmark(layer, start, end); |
| | } |
| | #endif |
| | } |
| | if (ret != 0) |
| | return ret; |
| |
|
| | |
| |
|
| | return 0; |
| | } |
| | #endif |
| |
|
| | int NetPrivate::convert_layout(Mat& bottom_blob, const Layer* layer, const Option& opt) const |
| | { |
| | |
| | |
| | #if NCNN_ARM82 |
| | if (opt.use_fp16_storage && cpu_support_arm_asimdhp()) |
| | { |
| | if (bottom_blob.elembits() == 32 && layer->support_fp16_storage) |
| | { |
| | Mat bottom_blob_fp16; |
| | cast_float32_to_float16(bottom_blob, bottom_blob_fp16, opt); |
| | bottom_blob = bottom_blob_fp16; |
| | } |
| | if (bottom_blob.elembits() == 16 && !layer->support_fp16_storage) |
| | { |
| | Mat bottom_blob_fp32; |
| | cast_float16_to_float32(bottom_blob, bottom_blob_fp32, opt); |
| | bottom_blob = bottom_blob_fp32; |
| | } |
| | } |
| | else |
| | #endif |
| | #if NCNN_RVV |
| | if (opt.use_fp16_storage && cpu_support_riscv_v() && cpu_support_riscv_zfh()) |
| | { |
| | if (bottom_blob.elembits() == 32 && layer->support_fp16_storage) |
| | { |
| | Mat bottom_blob_fp16; |
| | cast_float32_to_float16(bottom_blob, bottom_blob_fp16, opt); |
| | bottom_blob = bottom_blob_fp16; |
| | } |
| | if (bottom_blob.elembits() == 16 && !layer->support_fp16_storage) |
| | { |
| | Mat bottom_blob_fp32; |
| | cast_float16_to_float32(bottom_blob, bottom_blob_fp32, opt); |
| | bottom_blob = bottom_blob_fp32; |
| | } |
| | } |
| | else |
| | #endif |
| | #if NCNN_BF16 |
| | if (opt.use_bf16_storage) |
| | { |
| | if (bottom_blob.elembits() == 32 && layer->support_bf16_storage) |
| | { |
| | Mat bottom_blob_bf16; |
| | cast_float32_to_bfloat16(bottom_blob, bottom_blob_bf16, opt); |
| | bottom_blob = bottom_blob_bf16; |
| | } |
| | if (bottom_blob.elembits() == 16 && !layer->support_bf16_storage) |
| | { |
| | Mat bottom_blob_fp32; |
| | cast_bfloat16_to_float32(bottom_blob, bottom_blob_fp32, opt); |
| | bottom_blob = bottom_blob_fp32; |
| | } |
| | } |
| | else |
| | #endif |
| | { |
| | |
| | } |
| | |
| | |
| |
|
| | int dst_elempack = 1; |
| | if (opt.use_packing_layout) |
| | { |
| | |
| | int dims = bottom_blob.dims; |
| | int elemcount = 0; |
| | if (dims == 1) elemcount = bottom_blob.elempack * bottom_blob.w; |
| | if (dims == 2) elemcount = bottom_blob.elempack * bottom_blob.h; |
| | if (dims == 3 || dims == 4) elemcount = bottom_blob.elempack * bottom_blob.c; |
| |
|
| | int elembits = bottom_blob.elembits(); |
| |
|
| | if (layer->support_packing) |
| | { |
| | if (elembits == 32) |
| | { |
| | #if NCNN_AVX512 |
| | if (elemcount % 16 == 0 && ncnn::cpu_support_x86_avx512()) |
| | dst_elempack = 16; |
| | else if (elemcount % 8 == 0 && ncnn::cpu_support_x86_avx()) |
| | dst_elempack = 8; |
| | else if (elemcount % 4 == 0) |
| | dst_elempack = 4; |
| | #elif NCNN_AVX |
| | if (elemcount % 8 == 0 && ncnn::cpu_support_x86_avx()) |
| | dst_elempack = 8; |
| | else if (elemcount % 4 == 0) |
| | dst_elempack = 4; |
| | #elif NCNN_RVV |
| | const int packn = ncnn::cpu_riscv_vlenb() / 4; |
| | if (elemcount % packn == 0) |
| | dst_elempack = packn; |
| | #else |
| | if (elemcount % 4 == 0) |
| | dst_elempack = 4; |
| | #endif |
| | } |
| | if (elembits == 16) |
| | { |
| | #if NCNN_ARM82 |
| | if (elemcount % 8 == 0 && ncnn::cpu_support_arm_asimdhp() && opt.use_fp16_arithmetic) |
| | dst_elempack = 8; |
| | else if (elemcount % 4 == 0) |
| | dst_elempack = 4; |
| | #elif NCNN_RVV |
| | const int packn = ncnn::cpu_riscv_vlenb() / 2; |
| | if (elemcount % packn == 0) |
| | dst_elempack = packn; |
| | #else |
| | if (elemcount % 4 == 0) |
| | dst_elempack = 4; |
| | #endif |
| | } |
| | if (elembits == 8) |
| | { |
| | #if NCNN_RVV |
| | const int packn = ncnn::cpu_riscv_vlenb() / 1; |
| | if (elemcount % packn == 0) |
| | dst_elempack = packn; |
| | #else |
| | if (elemcount % 8 == 0) |
| | dst_elempack = 8; |
| | #endif |
| | } |
| | } |
| | } |
| |
|
| | if (bottom_blob.elempack != dst_elempack) |
| | { |
| | Mat bottom_blob_packed; |
| | convert_packing(bottom_blob, bottom_blob_packed, dst_elempack, opt); |
| | bottom_blob = bottom_blob_packed; |
| | } |
| |
|
| | return 0; |
| | } |
| |
|
| | int NetPrivate::do_forward_layer(const Layer* layer, std::vector<Mat>& blob_mats, const Option& opt) const |
| | { |
| | if (layer->one_blob_only) |
| | { |
| | int bottom_blob_index = layer->bottoms[0]; |
| | int top_blob_index = layer->tops[0]; |
| |
|
| | Mat& bottom_blob_ref = blob_mats[bottom_blob_index]; |
| | Mat bottom_blob; |
| |
|
| | if (opt.lightmode) |
| | { |
| | |
| | if (layer->support_inplace && *bottom_blob_ref.refcount != 1) |
| | { |
| | bottom_blob = bottom_blob_ref.clone(opt.blob_allocator); |
| | } |
| | } |
| | if (bottom_blob.dims == 0) |
| | { |
| | bottom_blob = bottom_blob_ref; |
| | } |
| |
|
| | convert_layout(bottom_blob, layer, opt); |
| |
|
| | |
| | if (opt.lightmode && layer->support_inplace) |
| | { |
| | Mat& bottom_top_blob = bottom_blob; |
| | int ret = layer->forward_inplace(bottom_top_blob, opt); |
| | if (ret != 0) |
| | return ret; |
| |
|
| | |
| | blob_mats[top_blob_index] = bottom_top_blob; |
| | } |
| | else |
| | { |
| | Mat top_blob; |
| | int ret = layer->forward(bottom_blob, top_blob, opt); |
| | if (ret != 0) |
| | return ret; |
| |
|
| | |
| | blob_mats[top_blob_index] = top_blob; |
| | } |
| |
|
| | if (opt.lightmode) |
| | { |
| | |
| | blob_mats[bottom_blob_index].release(); |
| | } |
| | } |
| | else |
| | { |
| | std::vector<Mat> bottom_blobs(layer->bottoms.size()); |
| | for (size_t i = 0; i < layer->bottoms.size(); i++) |
| | { |
| | int bottom_blob_index = layer->bottoms[i]; |
| |
|
| | Mat& bottom_blob_ref = blob_mats[bottom_blob_index]; |
| | bottom_blobs[i].release(); |
| |
|
| | if (opt.lightmode) |
| | { |
| | |
| | if (layer->support_inplace && *bottom_blob_ref.refcount != 1) |
| | { |
| | bottom_blobs[i] = bottom_blob_ref.clone(opt.blob_allocator); |
| | } |
| | } |
| | if (bottom_blobs[i].dims == 0) |
| | { |
| | bottom_blobs[i] = bottom_blob_ref; |
| | } |
| |
|
| | convert_layout(bottom_blobs[i], layer, opt); |
| | } |
| |
|
| | |
| | if (opt.lightmode && layer->support_inplace) |
| | { |
| | std::vector<Mat>& bottom_top_blobs = bottom_blobs; |
| | int ret = layer->forward_inplace(bottom_top_blobs, opt); |
| | if (ret != 0) |
| | return ret; |
| |
|
| | |
| | for (size_t i = 0; i < layer->tops.size(); i++) |
| | { |
| | int top_blob_index = layer->tops[i]; |
| |
|
| | blob_mats[top_blob_index] = bottom_top_blobs[i]; |
| | } |
| | } |
| | else |
| | { |
| | std::vector<Mat> top_blobs(layer->tops.size()); |
| | int ret = layer->forward(bottom_blobs, top_blobs, opt); |
| | if (ret != 0) |
| | return ret; |
| |
|
| | |
| | for (size_t i = 0; i < layer->tops.size(); i++) |
| | { |
| | int top_blob_index = layer->tops[i]; |
| |
|
| | blob_mats[top_blob_index] = top_blobs[i]; |
| | } |
| | } |
| |
|
| | if (opt.lightmode) |
| | { |
| | for (size_t i = 0; i < layer->bottoms.size(); i++) |
| | { |
| | int bottom_blob_index = layer->bottoms[i]; |
| |
|
| | |
| | blob_mats[bottom_blob_index].release(); |
| | } |
| | } |
| | } |
| |
|
| | return 0; |
| | } |
| |
|
| | #if NCNN_VULKAN |
| | int NetPrivate::do_forward_layer(const Layer* layer, std::vector<VkMat>& blob_mats_gpu, VkCompute& cmd, const Option& opt) const |
| | { |
| | if (layer->one_blob_only) |
| | { |
| | |
| | int bottom_blob_index = layer->bottoms[0]; |
| | int top_blob_index = layer->tops[0]; |
| |
|
| | VkMat& bottom_blob_ref = blob_mats_gpu[bottom_blob_index]; |
| | VkMat bottom_blob; |
| |
|
| | if (opt.lightmode) |
| | { |
| | |
| | if (layer->support_inplace && *bottom_blob_ref.refcount != 1) |
| | { |
| | cmd.record_clone(bottom_blob_ref, bottom_blob, opt); |
| | |
| | } |
| | } |
| | if (bottom_blob.dims == 0) |
| | { |
| | bottom_blob = bottom_blob_ref; |
| | } |
| |
|
| | |
| | if (opt.lightmode && layer->support_inplace) |
| | { |
| | VkMat& bottom_top_blob = bottom_blob; |
| | int ret = layer->forward_inplace(bottom_top_blob, cmd, opt); |
| | if (ret != 0) |
| | return ret; |
| |
|
| | |
| | blob_mats_gpu[top_blob_index] = bottom_top_blob; |
| | } |
| | else |
| | { |
| | VkMat top_blob; |
| | int ret = layer->forward(bottom_blob, top_blob, cmd, opt); |
| | if (ret != 0) |
| | return ret; |
| |
|
| | |
| | blob_mats_gpu[top_blob_index] = top_blob; |
| | } |
| |
|
| | if (opt.lightmode) |
| | { |
| | |
| | blob_mats_gpu[bottom_blob_index].release(); |
| | } |
| | } |
| | else |
| | { |
| | |
| | std::vector<VkMat> bottom_blobs(layer->bottoms.size()); |
| | for (size_t i = 0; i < layer->bottoms.size(); i++) |
| | { |
| | int bottom_blob_index = layer->bottoms[i]; |
| |
|
| | VkMat& bottom_blob_ref = blob_mats_gpu[bottom_blob_index]; |
| | bottom_blobs[i].release(); |
| |
|
| | if (opt.lightmode) |
| | { |
| | |
| | if (layer->support_inplace && *bottom_blob_ref.refcount != 1) |
| | { |
| | cmd.record_clone(bottom_blob_ref, bottom_blobs[i], opt); |
| | |
| | } |
| | } |
| | if (bottom_blobs[i].dims == 0) |
| | { |
| | bottom_blobs[i] = bottom_blob_ref; |
| | } |
| | } |
| |
|
| | |
| | if (opt.lightmode && layer->support_inplace) |
| | { |
| | std::vector<VkMat>& bottom_top_blobs = bottom_blobs; |
| | int ret = layer->forward_inplace(bottom_top_blobs, cmd, opt); |
| | if (ret != 0) |
| | return ret; |
| |
|
| | |
| | for (size_t i = 0; i < layer->tops.size(); i++) |
| | { |
| | int top_blob_index = layer->tops[i]; |
| |
|
| | blob_mats_gpu[top_blob_index] = bottom_top_blobs[i]; |
| | } |
| | } |
| | else |
| | { |
| | std::vector<VkMat> top_blobs(layer->tops.size()); |
| | int ret = layer->forward(bottom_blobs, top_blobs, cmd, opt); |
| | if (ret != 0) |
| | return ret; |
| |
|
| | |
| | for (size_t i = 0; i < layer->tops.size(); i++) |
| | { |
| | int top_blob_index = layer->tops[i]; |
| |
|
| | blob_mats_gpu[top_blob_index] = top_blobs[i]; |
| | } |
| | } |
| |
|
| | if (opt.lightmode) |
| | { |
| | for (size_t i = 0; i < layer->bottoms.size(); i++) |
| | { |
| | int bottom_blob_index = layer->bottoms[i]; |
| |
|
| | |
| | blob_mats_gpu[bottom_blob_index].release(); |
| | } |
| | } |
| | } |
| |
|
| | return 0; |
| | } |
| |
|
| | int NetPrivate::do_forward_layer(const Layer* layer, std::vector<VkImageMat>& blob_mats_gpu_image, VkCompute& cmd, const Option& opt) const |
| | { |
| | if (layer->one_blob_only) |
| | { |
| | |
| | int bottom_blob_index = layer->bottoms[0]; |
| | int top_blob_index = layer->tops[0]; |
| |
|
| | VkImageMat& bottom_blob_ref = blob_mats_gpu_image[bottom_blob_index]; |
| | VkImageMat bottom_blob; |
| |
|
| | if (opt.lightmode) |
| | { |
| | |
| | if (layer->support_inplace && *bottom_blob_ref.refcount != 1) |
| | { |
| | cmd.record_clone(bottom_blob_ref, bottom_blob, opt); |
| | |
| | } |
| | } |
| | if (bottom_blob.dims == 0) |
| | { |
| | bottom_blob = bottom_blob_ref; |
| | } |
| |
|
| | |
| | if (opt.lightmode && layer->support_inplace) |
| | { |
| | VkImageMat& bottom_top_blob = bottom_blob; |
| | int ret = layer->forward_inplace(bottom_top_blob, cmd, opt); |
| | if (ret != 0) |
| | return ret; |
| |
|
| | |
| | blob_mats_gpu_image[top_blob_index] = bottom_top_blob; |
| | } |
| | else |
| | { |
| | VkImageMat top_blob; |
| | int ret = layer->forward(bottom_blob, top_blob, cmd, opt); |
| | if (ret != 0) |
| | return ret; |
| |
|
| | |
| | blob_mats_gpu_image[top_blob_index] = top_blob; |
| | } |
| |
|
| | if (opt.lightmode) |
| | { |
| | |
| | blob_mats_gpu_image[bottom_blob_index].release(); |
| | } |
| | } |
| | else |
| | { |
| | |
| | std::vector<VkImageMat> bottom_blobs(layer->bottoms.size()); |
| | for (size_t i = 0; i < layer->bottoms.size(); i++) |
| | { |
| | int bottom_blob_index = layer->bottoms[i]; |
| |
|
| | VkImageMat& bottom_blob_ref = blob_mats_gpu_image[bottom_blob_index]; |
| |
|
| | if (opt.lightmode) |
| | { |
| | |
| | if (layer->support_inplace && *bottom_blob_ref.refcount != 1) |
| | { |
| | cmd.record_clone(bottom_blob_ref, bottom_blobs[i], opt); |
| | |
| | } |
| | } |
| | if (bottom_blobs[i].dims == 0) |
| | { |
| | bottom_blobs[i] = bottom_blob_ref; |
| | } |
| | } |
| |
|
| | |
| | if (opt.lightmode && layer->support_inplace) |
| | { |
| | std::vector<VkImageMat>& bottom_top_blobs = bottom_blobs; |
| | int ret = layer->forward_inplace(bottom_top_blobs, cmd, opt); |
| | if (ret != 0) |
| | return ret; |
| |
|
| | |
| | for (size_t i = 0; i < layer->tops.size(); i++) |
| | { |
| | int top_blob_index = layer->tops[i]; |
| |
|
| | blob_mats_gpu_image[top_blob_index] = bottom_top_blobs[i]; |
| | } |
| | } |
| | else |
| | { |
| | std::vector<VkImageMat> top_blobs(layer->tops.size()); |
| | int ret = layer->forward(bottom_blobs, top_blobs, cmd, opt); |
| | if (ret != 0) |
| | return ret; |
| |
|
| | |
| | for (size_t i = 0; i < layer->tops.size(); i++) |
| | { |
| | int top_blob_index = layer->tops[i]; |
| |
|
| | blob_mats_gpu_image[top_blob_index] = top_blobs[i]; |
| | } |
| | } |
| |
|
| | if (opt.lightmode) |
| | { |
| | for (size_t i = 0; i < layer->bottoms.size(); i++) |
| | { |
| | int bottom_blob_index = layer->bottoms[i]; |
| |
|
| | |
| | blob_mats_gpu_image[bottom_blob_index].release(); |
| | } |
| | } |
| | } |
| |
|
| | return 0; |
| | } |
| | #endif |
| |
|
| | void NetPrivate::update_input_output_indexes() |
| | { |
| | input_blob_indexes.clear(); |
| | output_blob_indexes.clear(); |
| |
|
| | for (size_t i = 0; i < layers.size(); i++) |
| | { |
| | if (layers[i]->typeindex == LayerType::Input) |
| | { |
| | int blob_index = layers[i]->tops[0]; |
| | input_blob_indexes.push_back(blob_index); |
| | } |
| | } |
| |
|
| | for (size_t i = 0; i < blobs.size(); i++) |
| | { |
| | if (blobs[i].producer != -1 && blobs[i].consumer == -1) |
| | { |
| | output_blob_indexes.push_back(i); |
| | } |
| | } |
| | } |
| |
|
| | #if NCNN_STRING |
| | void NetPrivate::update_input_output_names() |
| | { |
| | input_blob_names.clear(); |
| | output_blob_names.clear(); |
| |
|
| | for (size_t i = 0; i < input_blob_indexes.size(); i++) |
| | { |
| | int blob_index = input_blob_indexes[i]; |
| | input_blob_names.push_back(blobs[blob_index].name.c_str()); |
| | } |
| |
|
| | for (size_t i = 0; i < output_blob_indexes.size(); i++) |
| | { |
| | int blob_index = output_blob_indexes[i]; |
| | output_blob_names.push_back(blobs[blob_index].name.c_str()); |
| | } |
| | } |
| | #endif |
| |
|
| | Net::Net() |
| | : d(new NetPrivate(opt)) |
| | { |
| | } |
| |
|
| | Net::~Net() |
| | { |
| | clear(); |
| |
|
| | delete d; |
| | } |
| |
|
| | Net::Net(const Net&) |
| | : d(0) |
| | { |
| | } |
| |
|
| | Net& Net::operator=(const Net&) |
| | { |
| | return *this; |
| | } |
| |
|
| | #if NCNN_STRING |
| | int Net::register_custom_layer(const char* type, layer_creator_func creator, layer_destroyer_func destroyer, void* userdata) |
| | { |
| | int typeindex = layer_to_index(type); |
| | if (typeindex != -1) |
| | { |
| | NCNN_LOGE("overwrite built-in layer type %s", type); |
| |
|
| | for (size_t i = 0; i < d->overwrite_builtin_layer_registry.size(); i++) |
| | { |
| | if (d->overwrite_builtin_layer_registry[i].typeindex == typeindex) |
| | { |
| | NCNN_LOGE("overwrite existing overwritten built-in layer index %d", typeindex); |
| |
|
| | d->overwrite_builtin_layer_registry[i].creator = creator; |
| | d->overwrite_builtin_layer_registry[i].destroyer = destroyer; |
| | d->overwrite_builtin_layer_registry[i].userdata = userdata; |
| | return 0; |
| | } |
| | } |
| |
|
| | struct overwrite_builtin_layer_registry_entry entry = {typeindex, creator, destroyer, userdata}; |
| | d->overwrite_builtin_layer_registry.push_back(entry); |
| | return 0; |
| | } |
| |
|
| | int custom_index = custom_layer_to_index(type); |
| | if (custom_index == -1) |
| | { |
| | struct custom_layer_registry_entry entry = {type, creator, destroyer, userdata}; |
| | d->custom_layer_registry.push_back(entry); |
| | } |
| | else |
| | { |
| | NCNN_LOGE("overwrite existing custom layer type %s", type); |
| | d->custom_layer_registry[custom_index].name = type; |
| | d->custom_layer_registry[custom_index].creator = creator; |
| | d->custom_layer_registry[custom_index].destroyer = destroyer; |
| | d->custom_layer_registry[custom_index].userdata = userdata; |
| | } |
| |
|
| | return 0; |
| | } |
| | #endif |
| |
|
| | int Net::register_custom_layer(int index, layer_creator_func creator, layer_destroyer_func destroyer, void* userdata) |
| | { |
| | int custom_index = index & ~LayerType::CustomBit; |
| | if (index == custom_index) |
| | { |
| | NCNN_LOGE("overwrite built-in layer type %d", index); |
| |
|
| | for (size_t i = 0; i < d->overwrite_builtin_layer_registry.size(); i++) |
| | { |
| | if (d->overwrite_builtin_layer_registry[i].typeindex == index) |
| | { |
| | NCNN_LOGE("overwrite existing overwritten built-in layer index %d", index); |
| |
|
| | d->overwrite_builtin_layer_registry[i].creator = creator; |
| | d->overwrite_builtin_layer_registry[i].destroyer = destroyer; |
| | d->overwrite_builtin_layer_registry[i].userdata = userdata; |
| | return 0; |
| | } |
| | } |
| |
|
| | struct overwrite_builtin_layer_registry_entry entry = {index, creator, destroyer, userdata}; |
| | d->overwrite_builtin_layer_registry.push_back(entry); |
| | return 0; |
| | } |
| |
|
| | if ((int)d->custom_layer_registry.size() <= custom_index) |
| | { |
| | #if NCNN_STRING |
| | struct custom_layer_registry_entry dummy = {"", 0, 0, 0}; |
| | #else |
| | struct custom_layer_registry_entry dummy = {0, 0, 0}; |
| | #endif |
| | d->custom_layer_registry.resize(custom_index + 1, dummy); |
| | } |
| |
|
| | if (d->custom_layer_registry[custom_index].creator) |
| | { |
| | NCNN_LOGE("overwrite existing custom layer index %d", custom_index); |
| | } |
| |
|
| | d->custom_layer_registry[custom_index].creator = creator; |
| | d->custom_layer_registry[custom_index].destroyer = destroyer; |
| | d->custom_layer_registry[custom_index].userdata = userdata; |
| | return 0; |
| | } |
| |
|
| | #if NCNN_STRING |
| | int Net::load_param(const DataReader& dr) |
| | { |
| | #define SCAN_VALUE(fmt, v) \ |
| | if (dr.scan(fmt, &v) != 1) \ |
| | { \ |
| | NCNN_LOGE("parse " #v " failed"); \ |
| | return -1; \ |
| | } |
| |
|
| | int magic = 0; |
| | SCAN_VALUE("%d", magic) |
| | if (magic != 7767517) |
| | { |
| | NCNN_LOGE("param is too old, please regenerate"); |
| | return -1; |
| | } |
| |
|
| | |
| | int layer_count = 0; |
| | int blob_count = 0; |
| | SCAN_VALUE("%d", layer_count) |
| | SCAN_VALUE("%d", blob_count) |
| | if (layer_count <= 0 || blob_count <= 0) |
| | { |
| | NCNN_LOGE("invalid layer_count or blob_count"); |
| | return -1; |
| | } |
| |
|
| | d->layers.resize((size_t)layer_count); |
| | d->blobs.resize((size_t)blob_count); |
| |
|
| | #if NCNN_VULKAN |
| | |
| | if (opt.use_bf16_storage) |
| | opt.use_vulkan_compute = false; |
| |
|
| | if (opt.use_vulkan_compute) |
| | { |
| | if (!d->vkdev) d->vkdev = get_gpu_device(); |
| | if (!d->vkdev) opt.use_vulkan_compute = false; |
| | } |
| | if (opt.use_vulkan_compute) |
| | { |
| | |
| | if (!d->vkdev->info.support_fp16_packed()) opt.use_fp16_packed = false; |
| | if (!d->vkdev->info.support_fp16_storage()) opt.use_fp16_storage = false; |
| | if (!d->vkdev->info.support_fp16_arithmetic()) opt.use_fp16_arithmetic = false; |
| | if (!d->vkdev->info.support_int8_storage()) opt.use_int8_storage = false; |
| | if (!d->vkdev->info.support_int8_arithmetic()) opt.use_int8_arithmetic = false; |
| | if (!d->vkdev->info.support_cooperative_matrix()) opt.use_cooperative_matrix = false; |
| |
|
| | if (d->vkdev->info.bug_buffer_image_load_zero()) opt.use_image_storage = false; |
| |
|
| | |
| | if (d->vkdev->info.type() != 0) opt.use_shader_local_memory = false; |
| |
|
| | |
| | if (!opt.use_fp16_packed && !opt.use_fp16_storage) opt.use_fp16_arithmetic = false; |
| | } |
| | else |
| | { |
| | |
| | if (!opt.use_fp16_storage) opt.use_fp16_arithmetic = false; |
| | } |
| | #endif |
| |
|
| | ParamDict pd; |
| |
|
| | int blob_index = 0; |
| | for (int i = 0; i < layer_count; i++) |
| | { |
| | char layer_type[256]; |
| | char layer_name[256]; |
| | int bottom_count = 0; |
| | int top_count = 0; |
| | SCAN_VALUE("%255s", layer_type) |
| | SCAN_VALUE("%255s", layer_name) |
| | SCAN_VALUE("%d", bottom_count) |
| | SCAN_VALUE("%d", top_count) |
| |
|
| | Layer* layer = create_overwrite_builtin_layer(layer_type); |
| | if (!layer) |
| | { |
| | layer = create_layer(layer_type); |
| | } |
| | if (!layer) |
| | { |
| | layer = create_custom_layer(layer_type); |
| | } |
| | if (!layer) |
| | { |
| | NCNN_LOGE("layer %s not exists or registered", layer_type); |
| | clear(); |
| | return -1; |
| | } |
| |
|
| | #if NCNN_VULKAN |
| | if (opt.use_vulkan_compute) |
| | layer->vkdev = d->vkdev; |
| | #endif |
| |
|
| | layer->type = std::string(layer_type); |
| | layer->name = std::string(layer_name); |
| | |
| |
|
| | layer->bottoms.resize(bottom_count); |
| |
|
| | for (int j = 0; j < bottom_count; j++) |
| | { |
| | char bottom_name[256]; |
| | SCAN_VALUE("%255s", bottom_name) |
| |
|
| | int bottom_blob_index = find_blob_index_by_name(bottom_name); |
| | if (bottom_blob_index == -1) |
| | { |
| | Blob& blob = d->blobs[blob_index]; |
| |
|
| | bottom_blob_index = blob_index; |
| |
|
| | blob.name = std::string(bottom_name); |
| | |
| |
|
| | blob_index++; |
| | } |
| |
|
| | Blob& blob = d->blobs[bottom_blob_index]; |
| |
|
| | blob.consumer = i; |
| |
|
| | layer->bottoms[j] = bottom_blob_index; |
| | } |
| |
|
| | layer->tops.resize(top_count); |
| | for (int j = 0; j < top_count; j++) |
| | { |
| | Blob& blob = d->blobs[blob_index]; |
| |
|
| | char blob_name[256]; |
| | SCAN_VALUE("%255s", blob_name) |
| |
|
| | blob.name = std::string(blob_name); |
| | |
| |
|
| | blob.producer = i; |
| |
|
| | layer->tops[j] = blob_index; |
| |
|
| | blob_index++; |
| | } |
| |
|
| | |
| | int pdlr = pd.load_param(dr); |
| | if (pdlr != 0) |
| | { |
| | NCNN_LOGE("ParamDict load_param %d %s failed", i, layer->name.c_str()); |
| | continue; |
| | } |
| |
|
| | if (layer->support_int8_storage) |
| | { |
| | |
| | opt.use_vulkan_compute = false; |
| | } |
| |
|
| | |
| | Mat shape_hints = pd.get(30, Mat()); |
| | if (!shape_hints.empty()) |
| | { |
| | const int* psh = shape_hints; |
| | for (int j = 0; j < top_count; j++) |
| | { |
| | Blob& blob = d->blobs[layer->tops[j]]; |
| |
|
| | int dims = psh[0]; |
| | if (dims == 1) |
| | { |
| | blob.shape = Mat(psh[1], (void*)0, 4u, 1); |
| | } |
| | if (dims == 2) |
| | { |
| | blob.shape = Mat(psh[1], psh[2], (void*)0, 4u, 1); |
| | } |
| | if (dims == 3) |
| | { |
| | blob.shape = Mat(psh[1], psh[2], psh[3], (void*)0, 4u, 1); |
| | } |
| |
|
| | psh += 4; |
| | } |
| | } |
| |
|
| | |
| | layer->bottom_shapes.resize(bottom_count); |
| | for (int j = 0; j < bottom_count; j++) |
| | { |
| | layer->bottom_shapes[j] = d->blobs[layer->bottoms[j]].shape; |
| | } |
| |
|
| | layer->top_shapes.resize(top_count); |
| | for (int j = 0; j < top_count; j++) |
| | { |
| | layer->top_shapes[j] = d->blobs[layer->tops[j]].shape; |
| | } |
| |
|
| | |
| | layer->featmask = pd.get(31, 0); |
| |
|
| | int lr = layer->load_param(pd); |
| | if (lr != 0) |
| | { |
| | NCNN_LOGE("layer load_param %d %s failed", i, layer->name.c_str()); |
| | continue; |
| | } |
| |
|
| | d->layers[i] = layer; |
| | } |
| |
|
| | d->update_input_output_indexes(); |
| | d->update_input_output_names(); |
| |
|
| | #undef SCAN_VALUE |
| | return 0; |
| | } |
| | #endif |
| |
|
| | int Net::load_param_bin(const DataReader& dr) |
| | { |
| | #define READ_VALUE(buf) \ |
| | if (dr.read(&buf, sizeof(buf)) != sizeof(buf)) \ |
| | { \ |
| | NCNN_LOGE("read " #buf " failed"); \ |
| | return -1; \ |
| | } |
| |
|
| | int magic = 0; |
| | READ_VALUE(magic) |
| | if (magic != 7767517) |
| | { |
| | NCNN_LOGE("param is too old, please regenerate"); |
| | return -1; |
| | } |
| |
|
| | int layer_count = 0; |
| | int blob_count = 0; |
| | READ_VALUE(layer_count) |
| | READ_VALUE(blob_count) |
| | if (layer_count <= 0 || blob_count <= 0) |
| | { |
| | NCNN_LOGE("invalid layer_count or blob_count"); |
| | return -1; |
| | } |
| |
|
| | d->layers.resize(layer_count); |
| | d->blobs.resize(blob_count); |
| |
|
| | #if NCNN_VULKAN |
| | |
| | if (opt.use_bf16_storage) |
| | opt.use_vulkan_compute = false; |
| |
|
| | if (opt.use_vulkan_compute) |
| | { |
| | if (!d->vkdev) d->vkdev = get_gpu_device(); |
| | if (!d->vkdev) opt.use_vulkan_compute = false; |
| | } |
| | if (opt.use_vulkan_compute) |
| | { |
| | |
| | if (!d->vkdev->info.support_fp16_packed()) opt.use_fp16_packed = false; |
| | if (!d->vkdev->info.support_fp16_storage()) opt.use_fp16_storage = false; |
| | if (!d->vkdev->info.support_fp16_arithmetic()) opt.use_fp16_arithmetic = false; |
| | if (!d->vkdev->info.support_int8_storage()) opt.use_int8_storage = false; |
| | if (!d->vkdev->info.support_int8_arithmetic()) opt.use_int8_arithmetic = false; |
| | if (!d->vkdev->info.support_cooperative_matrix()) opt.use_cooperative_matrix = false; |
| |
|
| | if (d->vkdev->info.bug_buffer_image_load_zero()) opt.use_image_storage = false; |
| |
|
| | |
| | if (d->vkdev->info.type() != 0) opt.use_shader_local_memory = false; |
| |
|
| | |
| | if (!opt.use_fp16_packed && !opt.use_fp16_storage) opt.use_fp16_arithmetic = false; |
| | } |
| | else |
| | { |
| | |
| | if (!opt.use_fp16_storage) opt.use_fp16_arithmetic = false; |
| | } |
| | #endif |
| |
|
| | ParamDict pd; |
| |
|
| | for (int i = 0; i < layer_count; i++) |
| | { |
| | int typeindex; |
| | int bottom_count; |
| | int top_count; |
| | READ_VALUE(typeindex) |
| | READ_VALUE(bottom_count) |
| | READ_VALUE(top_count) |
| |
|
| | Layer* layer = create_overwrite_builtin_layer(typeindex); |
| | if (!layer) |
| | { |
| | layer = create_layer(typeindex); |
| | } |
| | if (!layer) |
| | { |
| | int custom_index = typeindex & ~LayerType::CustomBit; |
| | layer = create_custom_layer(custom_index); |
| | } |
| | if (!layer) |
| | { |
| | NCNN_LOGE("layer %d not exists or registered", typeindex); |
| | clear(); |
| | return -1; |
| | } |
| |
|
| | #if NCNN_VULKAN |
| | if (opt.use_vulkan_compute) |
| | layer->vkdev = d->vkdev; |
| | #endif |
| |
|
| | |
| | |
| | |
| |
|
| | layer->bottoms.resize(bottom_count); |
| | for (int j = 0; j < bottom_count; j++) |
| | { |
| | int bottom_blob_index; |
| | READ_VALUE(bottom_blob_index) |
| |
|
| | Blob& blob = d->blobs[bottom_blob_index]; |
| |
|
| | blob.consumer = i; |
| |
|
| | layer->bottoms[j] = bottom_blob_index; |
| | } |
| |
|
| | layer->tops.resize(top_count); |
| | for (int j = 0; j < top_count; j++) |
| | { |
| | int top_blob_index; |
| | READ_VALUE(top_blob_index) |
| |
|
| | Blob& blob = d->blobs[top_blob_index]; |
| |
|
| | |
| | |
| |
|
| | blob.producer = i; |
| |
|
| | layer->tops[j] = top_blob_index; |
| | } |
| |
|
| | |
| | int pdlr = pd.load_param_bin(dr); |
| | if (pdlr != 0) |
| | { |
| | #if NCNN_STRING |
| | NCNN_LOGE("ParamDict load_param %d %s failed", i, layer->name.c_str()); |
| | #else |
| | NCNN_LOGE("ParamDict load_param %d failed", i); |
| | #endif |
| | continue; |
| | } |
| |
|
| | if (layer->support_int8_storage) |
| | { |
| | |
| | opt.use_vulkan_compute = false; |
| | } |
| |
|
| | |
| | Mat shape_hints = pd.get(30, Mat()); |
| | if (!shape_hints.empty()) |
| | { |
| | const int* psh = shape_hints; |
| | for (int j = 0; j < top_count; j++) |
| | { |
| | Blob& blob = d->blobs[layer->tops[j]]; |
| |
|
| | int dims = psh[0]; |
| | if (dims == 1) |
| | { |
| | blob.shape = Mat(psh[1], (void*)0, 4u, 1); |
| | } |
| | if (dims == 2) |
| | { |
| | blob.shape = Mat(psh[1], psh[2], (void*)0, 4u, 1); |
| | } |
| | if (dims == 3) |
| | { |
| | blob.shape = Mat(psh[1], psh[2], psh[3], (void*)0, 4u, 1); |
| | } |
| |
|
| | psh += 4; |
| | } |
| | } |
| |
|
| | |
| | layer->bottom_shapes.resize(bottom_count); |
| | for (int j = 0; j < bottom_count; j++) |
| | { |
| | layer->bottom_shapes[j] = d->blobs[layer->bottoms[j]].shape; |
| | } |
| |
|
| | layer->top_shapes.resize(top_count); |
| | for (int j = 0; j < top_count; j++) |
| | { |
| | layer->top_shapes[j] = d->blobs[layer->tops[j]].shape; |
| | } |
| |
|
| | |
| | layer->featmask = pd.get(31, 0); |
| |
|
| | int lr = layer->load_param(pd); |
| | if (lr != 0) |
| | { |
| | #if NCNN_STRING |
| | NCNN_LOGE("layer load_param %d %s failed", i, layer->name.c_str()); |
| | #else |
| | NCNN_LOGE("layer load_param %d failed", i); |
| | #endif |
| | continue; |
| | } |
| |
|
| | d->layers[i] = layer; |
| | } |
| |
|
| | d->update_input_output_indexes(); |
| |
|
| | #undef READ_VALUE |
| | return 0; |
| | } |
| |
|
| | int Net::load_model(const DataReader& dr) |
| | { |
| | if (d->layers.empty()) |
| | { |
| | NCNN_LOGE("network graph not ready"); |
| | return -1; |
| | } |
| |
|
| | int layer_count = (int)d->layers.size(); |
| |
|
| | |
| | int ret = 0; |
| |
|
| | #if NCNN_VULKAN |
| | if (opt.use_vulkan_compute) |
| | { |
| | if (!opt.pipeline_cache) |
| | { |
| | if (!d->pipeline_cache) |
| | d->pipeline_cache = new PipelineCache(d->vkdev); |
| | opt.pipeline_cache = d->pipeline_cache; |
| | } |
| | } |
| | #endif |
| |
|
| | ModelBinFromDataReader mb(dr); |
| | for (int i = 0; i < layer_count; i++) |
| | { |
| | Layer* layer = d->layers[i]; |
| |
|
| | |
| | if (!layer) |
| | { |
| | NCNN_LOGE("load_model error at layer %d, parameter file has inconsistent content.", i); |
| | ret = -1; |
| | break; |
| | } |
| |
|
| | int lret = layer->load_model(mb); |
| | if (lret != 0) |
| | { |
| | #if NCNN_STRING |
| | NCNN_LOGE("layer load_model %d %s failed", i, layer->name.c_str()); |
| | #else |
| | NCNN_LOGE("layer load_model %d failed", i); |
| | #endif |
| | ret = -1; |
| | break; |
| | } |
| |
|
| | if (layer->support_int8_storage) |
| | { |
| | |
| | opt.use_vulkan_compute = false; |
| | } |
| |
|
| | Option opt1 = get_masked_option(opt, layer->featmask); |
| | #if NCNN_VULKAN |
| | if (opt1.use_vulkan_compute) |
| | { |
| | if (!layer->support_image_storage) opt1.use_image_storage = false; |
| | } |
| | else |
| | { |
| | layer->vkdev = 0; |
| | layer->support_vulkan = false; |
| | } |
| | #endif |
| |
|
| | int cret = layer->create_pipeline(opt1); |
| | if (cret != 0) |
| | { |
| | #if NCNN_STRING |
| | NCNN_LOGE("layer create_pipeline %d %s failed", i, layer->name.c_str()); |
| | #else |
| | NCNN_LOGE("layer create_pipeline %d failed", i); |
| | #endif |
| | ret = -1; |
| | break; |
| | } |
| | } |
| |
|
| | if (opt.use_local_pool_allocator) |
| | { |
| | if (opt.blob_allocator == 0) |
| | { |
| | if (!d->local_blob_allocator) |
| | { |
| | d->local_blob_allocator = new PoolAllocator; |
| | d->local_blob_allocator->set_size_compare_ratio(0.f); |
| | } |
| | } |
| | if (opt.workspace_allocator == 0) |
| | { |
| | if (!d->local_workspace_allocator) |
| | { |
| | d->local_workspace_allocator = new PoolAllocator; |
| | d->local_workspace_allocator->set_size_compare_ratio(0.f); |
| | } |
| | } |
| | } |
| |
|
| | #if NCNN_VULKAN |
| | if (ret == 0 && opt.use_vulkan_compute) |
| | { |
| | ret = d->upload_model(); |
| | } |
| | #endif |
| |
|
| | return ret; |
| | } |
| |
|
| | #if NCNN_STDIO |
| | #if NCNN_STRING |
| | int Net::load_param(FILE* fp) |
| | { |
| | DataReaderFromStdio dr(fp); |
| | return load_param(dr); |
| | } |
| |
|
| | int Net::load_param_mem(const char* _mem) |
| | { |
| | const unsigned char* mem = (const unsigned char*)_mem; |
| | DataReaderFromMemory dr(mem); |
| | return load_param(dr); |
| | } |
| |
|
| | int Net::load_param(const char* protopath) |
| | { |
| | FILE* fp = fopen(protopath, "rb"); |
| | if (!fp) |
| | { |
| | NCNN_LOGE("fopen %s failed", protopath); |
| | return -1; |
| | } |
| |
|
| | int ret = load_param(fp); |
| | fclose(fp); |
| | return ret; |
| | } |
| | #endif |
| |
|
| | int Net::load_param_bin(FILE* fp) |
| | { |
| | DataReaderFromStdio dr(fp); |
| | return load_param_bin(dr); |
| | } |
| |
|
| | int Net::load_param_bin(const char* protopath) |
| | { |
| | FILE* fp = fopen(protopath, "rb"); |
| | if (!fp) |
| | { |
| | NCNN_LOGE("fopen %s failed", protopath); |
| | return -1; |
| | } |
| |
|
| | int ret = load_param_bin(fp); |
| | fclose(fp); |
| | return ret; |
| | } |
| |
|
| | int Net::load_model(FILE* fp) |
| | { |
| | DataReaderFromStdio dr(fp); |
| | return load_model(dr); |
| | } |
| |
|
| | int Net::load_model(const char* modelpath) |
| | { |
| | FILE* fp = fopen(modelpath, "rb"); |
| | if (!fp) |
| | { |
| | NCNN_LOGE("fopen %s failed", modelpath); |
| | return -1; |
| | } |
| |
|
| | int ret = load_model(fp); |
| | fclose(fp); |
| | return ret; |
| | } |
| | #endif |
| |
|
| | int Net::load_param(const unsigned char* _mem) |
| | { |
| | const unsigned char* mem = _mem; |
| | DataReaderFromMemory dr(mem); |
| | load_param_bin(dr); |
| | return static_cast<int>(mem - _mem); |
| | } |
| |
|
| | int Net::load_model(const unsigned char* _mem) |
| | { |
| | const unsigned char* mem = _mem; |
| | DataReaderFromMemory dr(mem); |
| | load_model(dr); |
| | return static_cast<int>(mem - _mem); |
| | } |
| |
|
| | #if NCNN_PLATFORM_API |
| | #if __ANDROID_API__ >= 9 |
| | #if NCNN_STRING |
| | int Net::load_param(AAsset* asset) |
| | { |
| | DataReaderFromAndroidAsset dr(asset); |
| | return load_param(dr); |
| | } |
| |
|
| | int Net::load_param(AAssetManager* mgr, const char* assetpath) |
| | { |
| | AAsset* asset = AAssetManager_open(mgr, assetpath, AASSET_MODE_BUFFER); |
| | if (!asset) |
| | { |
| | NCNN_LOGE("AAssetManager_open %s failed", assetpath); |
| | return -1; |
| | } |
| |
|
| | int ret = load_param(asset); |
| | AAsset_close(asset); |
| | return ret; |
| | } |
| | #endif |
| |
|
| | int Net::load_param_bin(AAsset* asset) |
| | { |
| | DataReaderFromAndroidAsset dr(asset); |
| | return load_param_bin(dr); |
| | } |
| |
|
| | int Net::load_param_bin(AAssetManager* mgr, const char* assetpath) |
| | { |
| | AAsset* asset = AAssetManager_open(mgr, assetpath, AASSET_MODE_BUFFER); |
| | if (!asset) |
| | { |
| | NCNN_LOGE("AAssetManager_open %s failed", assetpath); |
| | return -1; |
| | } |
| |
|
| | int ret = load_param_bin(asset); |
| | AAsset_close(asset); |
| | return ret; |
| | } |
| |
|
| | int Net::load_model(AAsset* asset) |
| | { |
| | DataReaderFromAndroidAsset dr(asset); |
| | return load_model(dr); |
| | } |
| |
|
| | int Net::load_model(AAssetManager* mgr, const char* assetpath) |
| | { |
| | AAsset* asset = AAssetManager_open(mgr, assetpath, AASSET_MODE_STREAMING); |
| | if (!asset) |
| | { |
| | NCNN_LOGE("AAssetManager_open %s failed", assetpath); |
| | return -1; |
| | } |
| |
|
| | int ret = load_model(asset); |
| | AAsset_close(asset); |
| | return ret; |
| | } |
| | #endif |
| | #endif |
| |
|
| | void Net::clear() |
| | { |
| | d->blobs.clear(); |
| | for (size_t i = 0; i < d->layers.size(); i++) |
| | { |
| | Layer* layer = d->layers[i]; |
| |
|
| | Option opt1 = get_masked_option(opt, layer->featmask); |
| | #if NCNN_VULKAN |
| | if (!layer->support_image_storage) |
| | { |
| | opt1.use_image_storage = false; |
| | } |
| | #endif |
| |
|
| | int dret = layer->destroy_pipeline(opt1); |
| | if (dret != 0) |
| | { |
| | NCNN_LOGE("layer destroy_pipeline failed"); |
| | |
| | } |
| |
|
| | if (layer->typeindex & ncnn::LayerType::CustomBit) |
| | { |
| | int custom_index = layer->typeindex & ~ncnn::LayerType::CustomBit; |
| | if (d->custom_layer_registry[custom_index].destroyer) |
| | { |
| | d->custom_layer_registry[custom_index].destroyer(layer, d->custom_layer_registry[custom_index].userdata); |
| | } |
| | else |
| | { |
| | delete layer; |
| | } |
| | } |
| | else |
| | { |
| | |
| | int index = -1; |
| | const size_t overwrite_builtin_layer_registry_entry_count = d->overwrite_builtin_layer_registry.size(); |
| | for (size_t i = 0; i < overwrite_builtin_layer_registry_entry_count; i++) |
| | { |
| | if (d->overwrite_builtin_layer_registry[i].typeindex == layer->typeindex) |
| | { |
| | index = i; |
| | break; |
| | } |
| | } |
| |
|
| | if (index != -1 && d->overwrite_builtin_layer_registry[index].destroyer) |
| | { |
| | d->overwrite_builtin_layer_registry[index].destroyer(layer, d->overwrite_builtin_layer_registry[index].userdata); |
| | } |
| | else |
| | { |
| | delete layer; |
| | } |
| | } |
| | } |
| | d->layers.clear(); |
| |
|
| | if (d->local_blob_allocator) |
| | { |
| | delete d->local_blob_allocator; |
| | d->local_blob_allocator = 0; |
| | } |
| | if (d->local_workspace_allocator) |
| | { |
| | delete d->local_workspace_allocator; |
| | d->local_workspace_allocator = 0; |
| | } |
| |
|
| | #if NCNN_VULKAN |
| | if (d->weight_vkallocator) |
| | { |
| | delete d->weight_vkallocator; |
| | d->weight_vkallocator = 0; |
| | } |
| | if (d->weight_staging_vkallocator) |
| | { |
| | delete d->weight_staging_vkallocator; |
| | d->weight_staging_vkallocator = 0; |
| | } |
| | if (d->pipeline_cache) |
| | { |
| | delete d->pipeline_cache; |
| | d->pipeline_cache = 0; |
| | opt.pipeline_cache = 0; |
| | } |
| | #endif |
| | } |
| |
|
| | Extractor Net::create_extractor() const |
| | { |
| | return Extractor(this, d->blobs.size()); |
| | } |
| |
|
| | const std::vector<int>& Net::input_indexes() const |
| | { |
| | return d->input_blob_indexes; |
| | } |
| |
|
| | const std::vector<int>& Net::output_indexes() const |
| | { |
| | return d->output_blob_indexes; |
| | } |
| |
|
| | #if NCNN_STRING |
| | const std::vector<const char*>& Net::input_names() const |
| | { |
| | return d->input_blob_names; |
| | } |
| |
|
| | const std::vector<const char*>& Net::output_names() const |
| | { |
| | return d->output_blob_names; |
| | } |
| | #endif |
| |
|
| | const std::vector<Blob>& Net::blobs() const |
| | { |
| | return d->blobs; |
| | } |
| |
|
| | const std::vector<Layer*>& Net::layers() const |
| | { |
| | return d->layers; |
| | } |
| |
|
| | std::vector<Blob>& Net::mutable_blobs() |
| | { |
| | return d->blobs; |
| | } |
| |
|
| | std::vector<Layer*>& Net::mutable_layers() |
| | { |
| | return d->layers; |
| | } |
| |
|
| | #if NCNN_VULKAN |
| | void Net::set_vulkan_device(int device_index) |
| | { |
| | d->vkdev = get_gpu_device(device_index); |
| | } |
| |
|
| | void Net::set_vulkan_device(const VulkanDevice* _vkdev) |
| | { |
| | d->vkdev = _vkdev; |
| | } |
| |
|
| | const VulkanDevice* Net::vulkan_device() const |
| | { |
| | return d->vkdev; |
| | } |
| | #endif |
| |
|
| | #if NCNN_STRING |
| | int Net::find_blob_index_by_name(const char* name) const |
| | { |
| | for (size_t i = 0; i < d->blobs.size(); i++) |
| | { |
| | const Blob& blob = d->blobs[i]; |
| | if (blob.name == name) |
| | { |
| | return static_cast<int>(i); |
| | } |
| | } |
| |
|
| | NCNN_LOGE("find_blob_index_by_name %s failed", name); |
| | return -1; |
| | } |
| |
|
| | int Net::find_layer_index_by_name(const char* name) const |
| | { |
| | for (size_t i = 0; i < d->layers.size(); i++) |
| | { |
| | const Layer* layer = d->layers[i]; |
| | if (layer->name == name) |
| | { |
| | return static_cast<int>(i); |
| | } |
| | } |
| |
|
| | NCNN_LOGE("find_layer_index_by_name %s failed", name); |
| | return -1; |
| | } |
| |
|
| | int Net::custom_layer_to_index(const char* type) |
| | { |
| | const size_t custom_layer_registry_entry_count = d->custom_layer_registry.size(); |
| | for (size_t i = 0; i < custom_layer_registry_entry_count; i++) |
| | { |
| | if (strcmp(type, d->custom_layer_registry[i].name) == 0) |
| | return static_cast<int>(i); |
| | } |
| |
|
| | return -1; |
| | } |
| |
|
| | Layer* Net::create_custom_layer(const char* type) |
| | { |
| | int index = custom_layer_to_index(type); |
| | if (index == -1) |
| | return 0; |
| |
|
| | return create_custom_layer(index); |
| | } |
| |
|
| | Layer* Net::create_overwrite_builtin_layer(const char* type) |
| | { |
| | int typeindex = layer_to_index(type); |
| | if (typeindex == -1) |
| | return 0; |
| |
|
| | return create_overwrite_builtin_layer(typeindex); |
| | } |
| | #endif |
| |
|
| | Layer* Net::create_custom_layer(int index) |
| | { |
| | const size_t custom_layer_registry_entry_count = d->custom_layer_registry.size(); |
| | if (index < 0 || static_cast<unsigned int>(index) >= custom_layer_registry_entry_count) |
| | return 0; |
| |
|
| | layer_creator_func layer_creator = d->custom_layer_registry[index].creator; |
| | if (!layer_creator) |
| | return 0; |
| |
|
| | Layer* layer = layer_creator(d->custom_layer_registry[index].userdata); |
| | layer->typeindex = ncnn::LayerType::CustomBit | index; |
| | return layer; |
| | } |
| |
|
| | Layer* Net::create_overwrite_builtin_layer(int typeindex) |
| | { |
| | int index = -1; |
| | const size_t overwrite_builtin_layer_registry_entry_count = d->overwrite_builtin_layer_registry.size(); |
| | for (size_t i = 0; i < overwrite_builtin_layer_registry_entry_count; i++) |
| | { |
| | if (d->overwrite_builtin_layer_registry[i].typeindex == typeindex) |
| | { |
| | index = i; |
| | break; |
| | } |
| | } |
| |
|
| | if (index == -1) |
| | return 0; |
| |
|
| | layer_creator_func layer_creator = d->overwrite_builtin_layer_registry[index].creator; |
| | if (!layer_creator) |
| | return 0; |
| |
|
| | Layer* layer = layer_creator(d->overwrite_builtin_layer_registry[index].userdata); |
| | layer->typeindex = typeindex; |
| | return layer; |
| | } |
| |
|
| | class ExtractorPrivate |
| | { |
| | public: |
| | ExtractorPrivate(const Net* _net) |
| | : net(_net) |
| | { |
| | } |
| | const Net* net; |
| | std::vector<Mat> blob_mats; |
| | Option opt; |
| |
|
| | #if NCNN_VULKAN |
| | VkAllocator* local_blob_vkallocator; |
| | VkAllocator* local_staging_vkallocator; |
| |
|
| | std::vector<VkMat> blob_mats_gpu; |
| | std::vector<VkImageMat> blob_mats_gpu_image; |
| | #endif |
| | }; |
| |
|
| | Extractor::Extractor(const Net* _net, size_t blob_count) |
| | : d(new ExtractorPrivate(_net)) |
| | { |
| | d->blob_mats.resize(blob_count); |
| | d->opt = d->net->opt; |
| |
|
| | #if NCNN_VULKAN |
| | if (d->net->opt.use_vulkan_compute) |
| | { |
| | d->local_blob_vkallocator = 0; |
| | d->local_staging_vkallocator = 0; |
| |
|
| | d->blob_mats_gpu.resize(blob_count); |
| | d->blob_mats_gpu_image.resize(blob_count); |
| | } |
| | #endif |
| | } |
| |
|
| | Extractor::~Extractor() |
| | { |
| | clear(); |
| |
|
| | delete d; |
| | } |
| |
|
| | Extractor::Extractor(const Extractor& rhs) |
| | : d(new ExtractorPrivate(0)) |
| | { |
| | d->net = rhs.d->net; |
| | d->blob_mats = rhs.d->blob_mats; |
| | d->opt = rhs.d->opt; |
| |
|
| | #if NCNN_VULKAN |
| | d->local_blob_vkallocator = 0; |
| | d->local_staging_vkallocator = 0; |
| |
|
| | d->blob_mats_gpu = rhs.d->blob_mats_gpu; |
| | d->blob_mats_gpu_image = rhs.d->blob_mats_gpu_image; |
| | #endif |
| | } |
| |
|
| | Extractor& Extractor::operator=(const Extractor& rhs) |
| | { |
| | if (this == &rhs) |
| | return *this; |
| |
|
| | d->net = rhs.d->net; |
| | d->blob_mats = rhs.d->blob_mats; |
| | d->opt = rhs.d->opt; |
| |
|
| | #if NCNN_VULKAN |
| | d->local_blob_vkallocator = 0; |
| | d->local_staging_vkallocator = 0; |
| |
|
| | d->blob_mats_gpu = rhs.d->blob_mats_gpu; |
| | d->blob_mats_gpu_image = rhs.d->blob_mats_gpu_image; |
| | #endif |
| |
|
| | return *this; |
| | } |
| |
|
| | void Extractor::clear() |
| | { |
| | d->blob_mats.clear(); |
| |
|
| | #if NCNN_VULKAN |
| | if (d->opt.use_vulkan_compute) |
| | { |
| | d->blob_mats_gpu.clear(); |
| | d->blob_mats_gpu_image.clear(); |
| |
|
| | if (d->local_blob_vkallocator) |
| | { |
| | d->net->vulkan_device()->reclaim_blob_allocator(d->local_blob_vkallocator); |
| | } |
| | if (d->local_staging_vkallocator) |
| | { |
| | d->net->vulkan_device()->reclaim_staging_allocator(d->local_staging_vkallocator); |
| | } |
| | } |
| | #endif |
| | } |
| |
|
| | void Extractor::set_light_mode(bool enable) |
| | { |
| | d->opt.lightmode = enable; |
| | } |
| |
|
| | void Extractor::set_num_threads(int num_threads) |
| | { |
| | d->opt.num_threads = num_threads; |
| | } |
| |
|
| | void Extractor::set_blob_allocator(Allocator* allocator) |
| | { |
| | d->opt.blob_allocator = allocator; |
| | } |
| |
|
| | void Extractor::set_workspace_allocator(Allocator* allocator) |
| | { |
| | d->opt.workspace_allocator = allocator; |
| | } |
| |
|
| | #if NCNN_VULKAN |
| | void Extractor::set_vulkan_compute(bool enable) |
| | { |
| | if (d->net->d->opt.use_vulkan_compute) |
| | { |
| | d->opt.use_vulkan_compute = enable; |
| | } |
| | else |
| | { |
| | NCNN_LOGE("set_vulkan_compute failed, network use_vulkan_compute disabled"); |
| | } |
| | } |
| |
|
| | void Extractor::set_blob_vkallocator(VkAllocator* allocator) |
| | { |
| | d->opt.blob_vkallocator = allocator; |
| | } |
| |
|
| | void Extractor::set_workspace_vkallocator(VkAllocator* allocator) |
| | { |
| | d->opt.workspace_vkallocator = allocator; |
| | } |
| |
|
| | void Extractor::set_staging_vkallocator(VkAllocator* allocator) |
| | { |
| | d->opt.staging_vkallocator = allocator; |
| | } |
| | #endif |
| |
|
| | #if NCNN_STRING |
| | int Extractor::input(const char* blob_name, const Mat& in) |
| | { |
| | int blob_index = d->net->find_blob_index_by_name(blob_name); |
| | if (blob_index == -1) |
| | { |
| | NCNN_LOGE("Try"); |
| | const std::vector<const char*>& input_names = d->net->input_names(); |
| | for (size_t i = 0; i < input_names.size(); i++) |
| | { |
| | NCNN_LOGE(" ex.input(\"%s\", in%d);", input_names[i], (int)i); |
| | } |
| |
|
| | return -1; |
| | } |
| |
|
| | return input(blob_index, in); |
| | } |
| |
|
| | int Extractor::extract(const char* blob_name, Mat& feat, int type) |
| | { |
| | int blob_index = d->net->find_blob_index_by_name(blob_name); |
| | if (blob_index == -1) |
| | { |
| | NCNN_LOGE("Try"); |
| | const std::vector<const char*>& output_names = d->net->output_names(); |
| | for (size_t i = 0; i < output_names.size(); i++) |
| | { |
| | NCNN_LOGE(" ex.extract(\"%s\", out%d);", output_names[i], (int)i); |
| | } |
| |
|
| | return -1; |
| | } |
| |
|
| | return extract(blob_index, feat, type); |
| | } |
| | #endif |
| |
|
| | int Extractor::input(int blob_index, const Mat& in) |
| | { |
| | if (blob_index < 0 || blob_index >= (int)d->blob_mats.size()) |
| | return -1; |
| |
|
| | d->blob_mats[blob_index] = in; |
| |
|
| | return 0; |
| | } |
| |
|
| | int Extractor::extract(int blob_index, Mat& feat, int type) |
| | { |
| | if (blob_index < 0 || blob_index >= (int)d->blob_mats.size()) |
| | return -1; |
| |
|
| | int old_blocktime = get_kmp_blocktime(); |
| | set_kmp_blocktime(d->opt.openmp_blocktime); |
| |
|
| | int old_flush_denormals = get_flush_denormals(); |
| | set_flush_denormals(d->opt.flush_denormals); |
| |
|
| | int ret = 0; |
| |
|
| | if (d->blob_mats[blob_index].dims == 0) |
| | { |
| | int layer_index = d->net->blobs()[blob_index].producer; |
| |
|
| | |
| | if (d->opt.use_local_pool_allocator) |
| | { |
| | if (!d->opt.blob_allocator) |
| | { |
| | d->opt.blob_allocator = d->net->d->local_blob_allocator; |
| | } |
| | if (!d->opt.workspace_allocator) |
| | { |
| | d->opt.workspace_allocator = d->net->d->local_workspace_allocator; |
| | } |
| | } |
| |
|
| | #if NCNN_VULKAN |
| | if (d->opt.use_vulkan_compute) |
| | { |
| | |
| | if (!d->opt.blob_vkallocator) |
| | { |
| | d->local_blob_vkallocator = d->net->vulkan_device()->acquire_blob_allocator(); |
| | d->opt.blob_vkallocator = d->local_blob_vkallocator; |
| | } |
| | if (!d->opt.workspace_vkallocator) |
| | { |
| | d->opt.workspace_vkallocator = d->opt.blob_vkallocator; |
| | } |
| | if (!d->opt.staging_vkallocator) |
| | { |
| | d->local_staging_vkallocator = d->net->vulkan_device()->acquire_staging_allocator(); |
| | d->opt.staging_vkallocator = d->local_staging_vkallocator; |
| | } |
| |
|
| | ncnn::VkCompute cmd(d->net->vulkan_device()); |
| | #if NCNN_BENCHMARK |
| | cmd.create_query_pool(d->net->layers().size() * 2); |
| | #endif |
| |
|
| | |
| | if (d->opt.use_image_storage) |
| | { |
| | VkImageMat feat_gpu; |
| | ret = extract(blob_index, feat_gpu, cmd); |
| |
|
| | if (ret == 0 && d->blob_mats[blob_index].dims == 0 && feat_gpu.dims != 0) |
| | { |
| | cmd.record_download(feat_gpu, d->blob_mats[blob_index], d->opt); |
| |
|
| | ret = cmd.submit_and_wait(); |
| |
|
| | #if NCNN_BENCHMARK |
| | std::vector<uint64_t> results(d->net->layers().size() * 2); |
| | cmd.get_query_pool_results(0, d->net->layers().size() * 2, results); |
| | for (size_t i = 0; i < d->net->layers().size(); i++) |
| | { |
| | uint64_t start = results[i * 2]; |
| | uint64_t end = results[i * 2 + 1]; |
| | if (start == 0 || end == 0) |
| | continue; |
| |
|
| | double duration_us = (end - start) * d->net->vulkan_device()->info.timestamp_period() / 1000; |
| | NCNN_LOGE("%-24s %-30s %8.2lfus |", d->net->layers()[i]->type.c_str(), d->net->layers()[i]->name.c_str(), duration_us); |
| | } |
| | #endif |
| | } |
| | } |
| | else |
| | { |
| | VkMat feat_gpu; |
| | ret = extract(blob_index, feat_gpu, cmd); |
| |
|
| | if (ret == 0 && d->blob_mats[blob_index].dims == 0 && feat_gpu.dims != 0) |
| | { |
| | cmd.record_download(feat_gpu, d->blob_mats[blob_index], d->opt); |
| |
|
| | ret = cmd.submit_and_wait(); |
| |
|
| | #if NCNN_BENCHMARK |
| | std::vector<uint64_t> results(d->net->layers().size() * 2); |
| | cmd.get_query_pool_results(0, d->net->layers().size() * 2, results); |
| | for (size_t i = 0; i < d->net->layers().size(); i++) |
| | { |
| | uint64_t start = results[i * 2]; |
| | uint64_t end = results[i * 2 + 1]; |
| | if (start == 0 || end == 0) |
| | continue; |
| |
|
| | double duration_us = (end - start) * d->net->vulkan_device()->info.timestamp_period() / 1000; |
| | NCNN_LOGE("%-24s %-30s %8.2lfus |", d->net->layers()[i]->type.c_str(), d->net->layers()[i]->name.c_str(), duration_us); |
| | } |
| | #endif |
| | } |
| | } |
| | } |
| | else |
| | { |
| | ret = d->net->d->forward_layer(layer_index, d->blob_mats, d->opt); |
| | } |
| | #else |
| | ret = d->net->d->forward_layer(layer_index, d->blob_mats, d->opt); |
| | #endif |
| | } |
| |
|
| | feat = d->blob_mats[blob_index]; |
| |
|
| | if (d->opt.use_packing_layout && (type == 0) && feat.elempack != 1) |
| | { |
| | Mat bottom_blob_unpacked; |
| | convert_packing(feat, bottom_blob_unpacked, 1, d->opt); |
| | feat = bottom_blob_unpacked; |
| | } |
| |
|
| | |
| | |
| | #if NCNN_ARM82 |
| | if (d->opt.use_fp16_storage && cpu_support_arm_asimdhp() && (type == 0)) |
| | { |
| | if (feat.elembits() == 16) |
| | { |
| | Mat feat_fp32; |
| | cast_float16_to_float32(feat, feat_fp32, d->opt); |
| | feat = feat_fp32; |
| | } |
| | } |
| | else |
| | #endif |
| | #if NCNN_BF16 |
| | if (d->opt.use_bf16_storage && (type == 0)) |
| | { |
| | if (feat.elembits() == 16) |
| | { |
| | Mat feat_fp32; |
| | cast_bfloat16_to_float32(feat, feat_fp32, d->opt); |
| | feat = feat_fp32; |
| | } |
| | } |
| | else |
| | #endif |
| | if (feat.elembits() == 8 && (type == 0)) |
| | { |
| | Mat feat_fp32; |
| | cast_int8_to_float32(feat, feat_fp32, d->opt); |
| | feat = feat_fp32; |
| | } |
| | |
| | |
| |
|
| | if (d->opt.use_local_pool_allocator && feat.allocator == d->net->d->local_blob_allocator) |
| | { |
| | |
| | |
| | feat = feat.clone(); |
| | } |
| |
|
| | set_kmp_blocktime(old_blocktime); |
| | set_flush_denormals(old_flush_denormals); |
| |
|
| | return ret; |
| | } |
| |
|
| | #if NCNN_VULKAN |
| | #if NCNN_STRING |
| | int Extractor::input(const char* blob_name, const VkMat& in) |
| | { |
| | int blob_index = d->net->find_blob_index_by_name(blob_name); |
| | if (blob_index == -1) |
| | { |
| | NCNN_LOGE("Try"); |
| | const std::vector<const char*>& input_names = d->net->input_names(); |
| | for (size_t i = 0; i < input_names.size(); i++) |
| | { |
| | NCNN_LOGE(" ex.input(\"%s\", in%d);", input_names[i], (int)i); |
| | } |
| |
|
| | return -1; |
| | } |
| |
|
| | return input(blob_index, in); |
| | } |
| |
|
| | int Extractor::extract(const char* blob_name, VkMat& feat, VkCompute& cmd) |
| | { |
| | int blob_index = d->net->find_blob_index_by_name(blob_name); |
| | if (blob_index == -1) |
| | { |
| | NCNN_LOGE("Try"); |
| | const std::vector<const char*>& output_names = d->net->output_names(); |
| | for (size_t i = 0; i < output_names.size(); i++) |
| | { |
| | NCNN_LOGE(" ex.extract(\"%s\", out%d);", output_names[i], (int)i); |
| | } |
| |
|
| | return -1; |
| | } |
| |
|
| | return extract(blob_index, feat, cmd); |
| | } |
| |
|
| | int Extractor::input(const char* blob_name, const VkImageMat& in) |
| | { |
| | int blob_index = d->net->find_blob_index_by_name(blob_name); |
| | if (blob_index == -1) |
| | { |
| | NCNN_LOGE("Try"); |
| | const std::vector<const char*>& input_names = d->net->input_names(); |
| | for (size_t i = 0; i < input_names.size(); i++) |
| | { |
| | NCNN_LOGE(" ex.input(\"%s\", in%d);", input_names[i], (int)i); |
| | } |
| |
|
| | return -1; |
| | } |
| |
|
| | return input(blob_index, in); |
| | } |
| |
|
| | int Extractor::extract(const char* blob_name, VkImageMat& feat, VkCompute& cmd) |
| | { |
| | int blob_index = d->net->find_blob_index_by_name(blob_name); |
| | if (blob_index == -1) |
| | { |
| | NCNN_LOGE("Try"); |
| | const std::vector<const char*>& output_names = d->net->output_names(); |
| | for (size_t i = 0; i < output_names.size(); i++) |
| | { |
| | NCNN_LOGE(" ex.extract(\"%s\", out%d);", output_names[i], (int)i); |
| | } |
| |
|
| | return -1; |
| | } |
| |
|
| | return extract(blob_index, feat, cmd); |
| | } |
| | #endif |
| |
|
| | int Extractor::input(int blob_index, const VkMat& in) |
| | { |
| | if (blob_index < 0 || blob_index >= (int)d->blob_mats.size()) |
| | return -1; |
| |
|
| | d->blob_mats_gpu[blob_index] = in; |
| |
|
| | return 0; |
| | } |
| |
|
| | int Extractor::extract(int blob_index, VkMat& feat, VkCompute& cmd) |
| | { |
| | if (blob_index < 0 || blob_index >= (int)d->blob_mats.size()) |
| | return -1; |
| |
|
| | int old_blocktime = get_kmp_blocktime(); |
| | set_kmp_blocktime(d->opt.openmp_blocktime); |
| |
|
| | int old_flush_denormals = get_flush_denormals(); |
| | set_flush_denormals(d->opt.flush_denormals); |
| |
|
| | int ret = 0; |
| |
|
| | if (d->blob_mats_gpu[blob_index].dims == 0) |
| | { |
| | if (d->blob_mats_gpu_image[blob_index].dims != 0) |
| | { |
| | |
| | cmd.record_image_to_buffer(d->blob_mats_gpu_image[blob_index], d->blob_mats_gpu[blob_index], d->opt); |
| | } |
| | else if (d->blob_mats[blob_index].dims != 0) |
| | { |
| | |
| | cmd.record_upload(d->blob_mats[blob_index], d->blob_mats_gpu[blob_index], d->opt); |
| | } |
| | else |
| | { |
| | int layer_index = d->net->blobs()[blob_index].producer; |
| | ret = d->net->d->forward_layer(layer_index, d->blob_mats, d->blob_mats_gpu, cmd, d->opt); |
| | } |
| | } |
| |
|
| | feat = d->blob_mats_gpu[blob_index]; |
| |
|
| | set_kmp_blocktime(old_blocktime); |
| | set_flush_denormals(old_flush_denormals); |
| |
|
| | return ret; |
| | } |
| |
|
| | int Extractor::input(int blob_index, const VkImageMat& in) |
| | { |
| | if (blob_index < 0 || blob_index >= (int)d->blob_mats.size()) |
| | return -1; |
| |
|
| | d->blob_mats_gpu_image[blob_index] = in; |
| |
|
| | return 0; |
| | } |
| |
|
| | int Extractor::extract(int blob_index, VkImageMat& feat, VkCompute& cmd) |
| | { |
| | if (blob_index < 0 || blob_index >= (int)d->blob_mats.size()) |
| | return -1; |
| |
|
| | int old_blocktime = get_kmp_blocktime(); |
| | set_kmp_blocktime(d->opt.openmp_blocktime); |
| |
|
| | int old_flush_denormals = get_flush_denormals(); |
| | set_flush_denormals(d->opt.flush_denormals); |
| |
|
| | int ret = 0; |
| |
|
| | if (d->blob_mats_gpu_image[blob_index].dims == 0) |
| | { |
| | if (d->blob_mats_gpu[blob_index].dims != 0) |
| | { |
| | |
| | cmd.record_buffer_to_image(d->blob_mats_gpu[blob_index], d->blob_mats_gpu_image[blob_index], d->opt); |
| | } |
| | else if (d->blob_mats[blob_index].dims != 0) |
| | { |
| | |
| | cmd.record_upload(d->blob_mats[blob_index], d->blob_mats_gpu_image[blob_index], d->opt); |
| | } |
| | else |
| | { |
| | int layer_index = d->net->blobs()[blob_index].producer; |
| | ret = d->net->d->forward_layer(layer_index, d->blob_mats, d->blob_mats_gpu, d->blob_mats_gpu_image, cmd, d->opt); |
| | } |
| | } |
| |
|
| | feat = d->blob_mats_gpu_image[blob_index]; |
| |
|
| | if (feat.empty()) |
| | { |
| | NCNN_LOGE("extract %d image allocation failed", blob_index); |
| | ret = -100; |
| | } |
| |
|
| | set_kmp_blocktime(old_blocktime); |
| | set_flush_denormals(old_flush_denormals); |
| |
|
| | return ret; |
| | } |
| | #endif |
| |
|
| | } |
| |
|