| // Tencent is pleased to support the open source community by making ncnn available. | |
| // | |
| // Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved. | |
| // | |
| // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except | |
| // in compliance with the License. You may obtain a copy of the License at | |
| // | |
| // https://opensource.org/licenses/BSD-3-Clause | |
| // | |
| // Unless required by applicable law or agreed to in writing, software distributed | |
| // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR | |
| // CONDITIONS OF ANY KIND, either express or implied. See the License for the | |
| // specific language governing permissions and limitations under the License. | |
| namespace ncnn { | |
| class VkAllocator; | |
| class PipelineCache; | |
| class Allocator; | |
| class NCNN_EXPORT Option | |
| { | |
| public: | |
| // default option | |
| Option(); | |
| public: | |
| // light mode | |
| // intermediate blob will be recycled when enabled | |
| // enabled by default | |
| bool lightmode; | |
| // thread count | |
| // default value is the one returned by get_cpu_count() | |
| int num_threads; | |
| // blob memory allocator | |
| Allocator* blob_allocator; | |
| // workspace memory allocator | |
| Allocator* workspace_allocator; | |
| // blob memory allocator | |
| VkAllocator* blob_vkallocator; | |
| // workspace memory allocator | |
| VkAllocator* workspace_vkallocator; | |
| // staging memory allocator | |
| VkAllocator* staging_vkallocator; | |
| // pipeline cache | |
| PipelineCache* pipeline_cache; | |
| // the time openmp threads busy-wait for more work before going to sleep | |
| // default value is 20ms to keep the cores enabled | |
| // without too much extra power consumption afterwards | |
| int openmp_blocktime; | |
| // enable winograd convolution optimization | |
| // improve convolution 3x3 stride1 performance, may consume more memory | |
| // changes should be applied before loading network structure and weight | |
| // enabled by default | |
| bool use_winograd_convolution; | |
| // enable sgemm convolution optimization | |
| // improve convolution 1x1 stride1 performance, may consume more memory | |
| // changes should be applied before loading network structure and weight | |
| // enabled by default | |
| bool use_sgemm_convolution; | |
| // enable quantized int8 inference | |
| // use low-precision int8 path for quantized model | |
| // changes should be applied before loading network structure and weight | |
| // enabled by default | |
| bool use_int8_inference; | |
| // enable vulkan compute | |
| bool use_vulkan_compute; | |
| // enable bf16 data type for storage | |
| // improve most operator performance on all arm devices, may consume more memory | |
| bool use_bf16_storage; | |
| // enable options for gpu inference | |
| bool use_fp16_packed; | |
| bool use_fp16_storage; | |
| bool use_fp16_arithmetic; | |
| bool use_int8_packed; | |
| bool use_int8_storage; | |
| bool use_int8_arithmetic; | |
| // enable simd-friendly packed memory layout | |
| // improve all operator performance on all arm devices, will consume more memory | |
| // changes should be applied before loading network structure and weight | |
| // enabled by default | |
| bool use_packing_layout; | |
| bool use_shader_pack8; | |
| // subgroup option | |
| bool use_subgroup_basic; | |
| bool use_subgroup_vote; | |
| bool use_subgroup_ballot; | |
| bool use_subgroup_shuffle; | |
| // turn on for adreno | |
| bool use_image_storage; | |
| bool use_tensor_storage; | |
| bool use_reserved_0; | |
| // enable DAZ(Denormals-Are-Zero) and FTZ(Flush-To-Zero) | |
| // default value is 3 | |
| // 0 = DAZ OFF, FTZ OFF | |
| // 1 = DAZ ON , FTZ OFF | |
| // 2 = DAZ OFF, FTZ ON | |
| // 3 = DAZ ON, FTZ ON | |
| int flush_denormals; | |
| bool use_local_pool_allocator; | |
| // enable local memory optimization for gpu inference | |
| bool use_shader_local_memory; | |
| // enable cooperative matrix optimization for gpu inference | |
| bool use_cooperative_matrix; | |
| // more fine-grained control of winograd convolution | |
| bool use_winograd23_convolution; | |
| bool use_winograd43_convolution; | |
| bool use_winograd63_convolution; | |
| // this option is turned on for A53/A55 automatically | |
| // but you can force this on/off if you wish | |
| bool use_a53_a55_optimized_kernel; | |
| bool use_reserved_7; | |
| bool use_reserved_8; | |
| bool use_reserved_9; | |
| bool use_reserved_10; | |
| bool use_reserved_11; | |
| }; | |
| } // namespace ncnn | |