File size: 4,508 Bytes
be903e2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 | // Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
#ifndef NCNN_OPTION_H
#define NCNN_OPTION_H
#include "platform.h"
namespace ncnn {
#if NCNN_VULKAN
class VkAllocator;
class PipelineCache;
#endif // NCNN_VULKAN
class Allocator;
class NCNN_EXPORT Option
{
public:
// default option
Option();
public:
// light mode
// intermediate blob will be recycled when enabled
// enabled by default
bool lightmode;
// thread count
// default value is the one returned by get_cpu_count()
int num_threads;
// blob memory allocator
Allocator* blob_allocator;
// workspace memory allocator
Allocator* workspace_allocator;
#if NCNN_VULKAN
// blob memory allocator
VkAllocator* blob_vkallocator;
// workspace memory allocator
VkAllocator* workspace_vkallocator;
// staging memory allocator
VkAllocator* staging_vkallocator;
// pipeline cache
PipelineCache* pipeline_cache;
#endif // NCNN_VULKAN
// the time openmp threads busy-wait for more work before going to sleep
// default value is 20ms to keep the cores enabled
// without too much extra power consumption afterwards
int openmp_blocktime;
// enable winograd convolution optimization
// improve convolution 3x3 stride1 performance, may consume more memory
// changes should be applied before loading network structure and weight
// enabled by default
bool use_winograd_convolution;
// enable sgemm convolution optimization
// improve convolution 1x1 stride1 performance, may consume more memory
// changes should be applied before loading network structure and weight
// enabled by default
bool use_sgemm_convolution;
// enable quantized int8 inference
// use low-precision int8 path for quantized model
// changes should be applied before loading network structure and weight
// enabled by default
bool use_int8_inference;
// enable vulkan compute
bool use_vulkan_compute;
// enable bf16 data type for storage
// improve most operator performance on all arm devices, may consume more memory
bool use_bf16_storage;
// enable options for gpu inference
bool use_fp16_packed;
bool use_fp16_storage;
bool use_fp16_arithmetic;
bool use_int8_packed;
bool use_int8_storage;
bool use_int8_arithmetic;
// enable simd-friendly packed memory layout
// improve all operator performance on all arm devices, will consume more memory
// changes should be applied before loading network structure and weight
// enabled by default
bool use_packing_layout;
bool use_shader_pack8;
// subgroup option
bool use_subgroup_basic;
bool use_subgroup_vote;
bool use_subgroup_ballot;
bool use_subgroup_shuffle;
// turn on for adreno
bool use_image_storage;
bool use_tensor_storage;
bool use_reserved_0;
// enable DAZ(Denormals-Are-Zero) and FTZ(Flush-To-Zero)
// default value is 3
// 0 = DAZ OFF, FTZ OFF
// 1 = DAZ ON , FTZ OFF
// 2 = DAZ OFF, FTZ ON
// 3 = DAZ ON, FTZ ON
int flush_denormals;
bool use_local_pool_allocator;
// enable local memory optimization for gpu inference
bool use_shader_local_memory;
// enable cooperative matrix optimization for gpu inference
bool use_cooperative_matrix;
// more fine-grained control of winograd convolution
bool use_winograd23_convolution;
bool use_winograd43_convolution;
bool use_winograd63_convolution;
// this option is turned on for A53/A55 automatically
// but you can force this on/off if you wish
bool use_a53_a55_optimized_kernel;
bool use_reserved_7;
bool use_reserved_8;
bool use_reserved_9;
bool use_reserved_10;
bool use_reserved_11;
};
} // namespace ncnn
#endif // NCNN_OPTION_H
|