| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | #ifdef _MSC_VER |
| | #define _CRT_SECURE_NO_DEPRECATE |
| | #endif |
| |
|
| | #include <float.h> |
| | #include <limits.h> |
| | #include <math.h> |
| | #include <stdio.h> |
| | #include <stdint.h> |
| | #include <stdlib.h> |
| | #include <string.h> |
| |
|
| | #if defined(USE_NCNN_SIMPLEOCV) |
| | #include "simpleocv.h" |
| | #elif defined(USE_LOCAL_IMREADWRITE) |
| | #include "imreadwrite.h" |
| | #else |
| | #include <opencv2/core/core.hpp> |
| | #include <opencv2/highgui/highgui.hpp> |
| | #endif |
| | #include <string> |
| | #include <vector> |
| |
|
| | |
| | #include "benchmark.h" |
| | #include "cpu.h" |
| | #include "net.h" |
| |
|
| | |
| | #include "layer/convolution.h" |
| | #include "layer/convolutiondepthwise.h" |
| | #include "layer/innerproduct.h" |
| |
|
| | class QuantBlobStat |
| | { |
| | public: |
| | QuantBlobStat() |
| | { |
| | threshold = 0.f; |
| | absmax = 0.f; |
| | total = 0; |
| | } |
| |
|
| | public: |
| | float threshold; |
| | float absmax; |
| |
|
| | |
| | int total; |
| |
|
| | |
| | std::vector<uint64_t> histogram; |
| | std::vector<float> histogram_normed; |
| | }; |
| |
|
| | class QuantNet : public ncnn::Net |
| | { |
| | public: |
| | QuantNet(); |
| |
|
| | std::vector<ncnn::Blob>& blobs; |
| | std::vector<ncnn::Layer*>& layers; |
| |
|
| | public: |
| | std::vector<std::vector<std::string> > listspaths; |
| | std::vector<std::vector<float> > means; |
| | std::vector<std::vector<float> > norms; |
| | std::vector<std::vector<int> > shapes; |
| | std::vector<int> type_to_pixels; |
| | int quantize_num_threads; |
| |
|
| | public: |
| | int init(); |
| | void print_quant_info() const; |
| | int save_table(const char* tablepath); |
| | int quantize_KL(); |
| | int quantize_ACIQ(); |
| | int quantize_EQ(); |
| |
|
| | public: |
| | std::vector<int> input_blobs; |
| | std::vector<int> conv_layers; |
| | std::vector<int> conv_bottom_blobs; |
| | std::vector<int> conv_top_blobs; |
| |
|
| | |
| | std::vector<QuantBlobStat> quant_blob_stats; |
| | std::vector<ncnn::Mat> weight_scales; |
| | std::vector<ncnn::Mat> bottom_blob_scales; |
| | }; |
| |
|
| | QuantNet::QuantNet() |
| | : blobs(mutable_blobs()), layers(mutable_layers()) |
| | { |
| | quantize_num_threads = ncnn::get_cpu_count(); |
| | } |
| |
|
| | int QuantNet::init() |
| | { |
| | |
| | for (int i = 0; i < (int)layers.size(); i++) |
| | { |
| | const ncnn::Layer* layer = layers[i]; |
| | if (layer->type == "Input") |
| | { |
| | input_blobs.push_back(layer->tops[0]); |
| | } |
| | } |
| |
|
| | |
| | for (int i = 0; i < (int)layers.size(); i++) |
| | { |
| | const ncnn::Layer* layer = layers[i]; |
| | if (layer->type == "Convolution" || layer->type == "ConvolutionDepthWise" || layer->type == "InnerProduct") |
| | { |
| | conv_layers.push_back(i); |
| | conv_bottom_blobs.push_back(layer->bottoms[0]); |
| | conv_top_blobs.push_back(layer->tops[0]); |
| | } |
| | } |
| |
|
| | const int conv_layer_count = (int)conv_layers.size(); |
| | const int conv_bottom_blob_count = (int)conv_bottom_blobs.size(); |
| |
|
| | quant_blob_stats.resize(conv_bottom_blob_count); |
| | weight_scales.resize(conv_layer_count); |
| | bottom_blob_scales.resize(conv_bottom_blob_count); |
| |
|
| | return 0; |
| | } |
| |
|
| | int QuantNet::save_table(const char* tablepath) |
| | { |
| | FILE* fp = fopen(tablepath, "wb"); |
| | if (!fp) |
| | { |
| | fprintf(stderr, "fopen %s failed\n", tablepath); |
| | return -1; |
| | } |
| |
|
| | const int conv_layer_count = (int)conv_layers.size(); |
| | const int conv_bottom_blob_count = (int)conv_bottom_blobs.size(); |
| |
|
| | for (int i = 0; i < conv_layer_count; i++) |
| | { |
| | const ncnn::Mat& weight_scale = weight_scales[i]; |
| |
|
| | fprintf(fp, "%s_param_0 ", layers[conv_layers[i]]->name.c_str()); |
| | for (int j = 0; j < weight_scale.w; j++) |
| | { |
| | fprintf(fp, "%f ", weight_scale[j]); |
| | } |
| | fprintf(fp, "\n"); |
| | } |
| |
|
| | for (int i = 0; i < conv_bottom_blob_count; i++) |
| | { |
| | const ncnn::Mat& bottom_blob_scale = bottom_blob_scales[i]; |
| |
|
| | fprintf(fp, "%s ", layers[conv_layers[i]]->name.c_str()); |
| | for (int j = 0; j < bottom_blob_scale.w; j++) |
| | { |
| | fprintf(fp, "%f ", bottom_blob_scale[j]); |
| | } |
| | fprintf(fp, "\n"); |
| | } |
| |
|
| | fclose(fp); |
| |
|
| | fprintf(stderr, "ncnn int8 calibration table create success, best wish for your int8 inference has a low accuracy loss...\\(^0^)/...233...\n"); |
| |
|
| | return 0; |
| | } |
| |
|
| | void QuantNet::print_quant_info() const |
| | { |
| | for (int i = 0; i < (int)conv_bottom_blobs.size(); i++) |
| | { |
| | const QuantBlobStat& stat = quant_blob_stats[i]; |
| |
|
| | float scale = 127 / stat.threshold; |
| |
|
| | fprintf(stderr, "%-40s : max = %-15f threshold = %-15f scale = %-15f\n", layers[conv_layers[i]]->name.c_str(), stat.absmax, stat.threshold, scale); |
| | } |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | inline ncnn::Mat read_and_resize_image(const std::vector<int>& shape, const std::string& imagepath, int pixel_convert_type) |
| | { |
| | int target_w = shape[0]; |
| | int target_h = shape[1]; |
| | cv::Mat bgr = cv::imread(imagepath, 1); |
| | if (target_h <= 0 && target_w <= 0) |
| | { |
| | return ncnn::Mat::from_pixels(bgr.data, pixel_convert_type, bgr.cols, bgr.rows); |
| | } |
| | if (target_h <= 0 || target_w <= 0) |
| | { |
| | float scale = 1.0; |
| | if (target_h <= 0) |
| | { |
| | scale = 1.0 * bgr.cols / target_w; |
| | target_h = int(1.0 * bgr.rows / scale); |
| | } |
| | if (target_w <= 0) |
| | { |
| | scale = 1.0 * bgr.rows / target_h; |
| | target_w = int(1.0 * bgr.cols / scale); |
| | } |
| | } |
| | return ncnn::Mat::from_pixels_resize(bgr.data, pixel_convert_type, bgr.cols, bgr.rows, target_w, target_h); |
| | } |
| |
|
| | static float compute_kl_divergence(const std::vector<float>& a, const std::vector<float>& b) |
| | { |
| | const size_t length = a.size(); |
| |
|
| | float result = 0; |
| | for (size_t i = 0; i < length; i++) |
| | { |
| | result += a[i] * log(a[i] / b[i]); |
| | } |
| |
|
| | return result; |
| | } |
| |
|
| | int QuantNet::quantize_KL() |
| | { |
| | const int input_blob_count = (int)input_blobs.size(); |
| | const int conv_layer_count = (int)conv_layers.size(); |
| | const int conv_bottom_blob_count = (int)conv_bottom_blobs.size(); |
| | const int image_count = (int)listspaths[0].size(); |
| |
|
| | const int num_histogram_bins = 2048; |
| |
|
| | std::vector<ncnn::UnlockedPoolAllocator> blob_allocators(quantize_num_threads); |
| | std::vector<ncnn::UnlockedPoolAllocator> workspace_allocators(quantize_num_threads); |
| |
|
| | |
| | #pragma omp parallel for num_threads(quantize_num_threads) |
| | for (int i = 0; i < conv_layer_count; i++) |
| | { |
| | const ncnn::Layer* layer = layers[conv_layers[i]]; |
| |
|
| | if (layer->type == "Convolution") |
| | { |
| | const ncnn::Convolution* convolution = (const ncnn::Convolution*)layer; |
| |
|
| | const int num_output = convolution->num_output; |
| | const int kernel_w = convolution->kernel_w; |
| | const int kernel_h = convolution->kernel_h; |
| | const int dilation_w = convolution->dilation_w; |
| | const int dilation_h = convolution->dilation_h; |
| | const int stride_w = convolution->stride_w; |
| | const int stride_h = convolution->stride_h; |
| |
|
| | const int weight_data_size_output = convolution->weight_data_size / num_output; |
| |
|
| | |
| | |
| | bool quant_6bit = false; |
| | if (kernel_w == 3 && kernel_h == 3 && dilation_w == 1 && dilation_h == 1 && stride_w == 1 && stride_h == 1) |
| | quant_6bit = true; |
| |
|
| | weight_scales[i].create(num_output); |
| |
|
| | for (int n = 0; n < num_output; n++) |
| | { |
| | const ncnn::Mat weight_data_n = convolution->weight_data.range(weight_data_size_output * n, weight_data_size_output); |
| |
|
| | float absmax = 0.f; |
| | for (int k = 0; k < weight_data_size_output; k++) |
| | { |
| | absmax = std::max(absmax, (float)fabs(weight_data_n[k])); |
| | } |
| |
|
| | if (quant_6bit) |
| | { |
| | weight_scales[i][n] = 31 / absmax; |
| | } |
| | else |
| | { |
| | weight_scales[i][n] = 127 / absmax; |
| | } |
| | } |
| | } |
| |
|
| | if (layer->type == "ConvolutionDepthWise") |
| | { |
| | const ncnn::ConvolutionDepthWise* convolutiondepthwise = (const ncnn::ConvolutionDepthWise*)layer; |
| |
|
| | const int group = convolutiondepthwise->group; |
| | const int weight_data_size_output = convolutiondepthwise->weight_data_size / group; |
| |
|
| | std::vector<float> scales; |
| |
|
| | weight_scales[i].create(group); |
| |
|
| | for (int n = 0; n < group; n++) |
| | { |
| | const ncnn::Mat weight_data_n = convolutiondepthwise->weight_data.range(weight_data_size_output * n, weight_data_size_output); |
| |
|
| | float absmax = 0.f; |
| | for (int k = 0; k < weight_data_size_output; k++) |
| | { |
| | absmax = std::max(absmax, (float)fabs(weight_data_n[k])); |
| | } |
| |
|
| | weight_scales[i][n] = 127 / absmax; |
| | } |
| | } |
| |
|
| | if (layer->type == "InnerProduct") |
| | { |
| | const ncnn::InnerProduct* innerproduct = (const ncnn::InnerProduct*)layer; |
| |
|
| | const int num_output = innerproduct->num_output; |
| | const int weight_data_size_output = innerproduct->weight_data_size / num_output; |
| |
|
| | weight_scales[i].create(num_output); |
| |
|
| | for (int n = 0; n < num_output; n++) |
| | { |
| | const ncnn::Mat weight_data_n = innerproduct->weight_data.range(weight_data_size_output * n, weight_data_size_output); |
| |
|
| | float absmax = 0.f; |
| | for (int k = 0; k < weight_data_size_output; k++) |
| | { |
| | absmax = std::max(absmax, (float)fabs(weight_data_n[k])); |
| | } |
| |
|
| | weight_scales[i][n] = 127 / absmax; |
| | } |
| | } |
| | } |
| |
|
| | |
| | #pragma omp parallel for num_threads(quantize_num_threads) schedule(static, 1) |
| | for (int i = 0; i < image_count; i++) |
| | { |
| | if (i % 100 == 0) |
| | { |
| | fprintf(stderr, "count the absmax %.2f%% [ %d / %d ]\n", i * 100.f / image_count, i, image_count); |
| | } |
| |
|
| | ncnn::Extractor ex = create_extractor(); |
| | ex.set_light_mode(true); |
| |
|
| | const int thread_num = ncnn::get_omp_thread_num(); |
| | ex.set_blob_allocator(&blob_allocators[thread_num]); |
| | ex.set_workspace_allocator(&workspace_allocators[thread_num]); |
| |
|
| | for (int j = 0; j < input_blob_count; j++) |
| | { |
| | const int type_to_pixel = type_to_pixels[j]; |
| | const std::vector<float>& mean_vals = means[j]; |
| | const std::vector<float>& norm_vals = norms[j]; |
| |
|
| | int pixel_convert_type = ncnn::Mat::PIXEL_BGR; |
| | if (type_to_pixel != pixel_convert_type) |
| | { |
| | pixel_convert_type = pixel_convert_type | (type_to_pixel << ncnn::Mat::PIXEL_CONVERT_SHIFT); |
| | } |
| |
|
| | ncnn::Mat in = read_and_resize_image(shapes[j], listspaths[j][i], pixel_convert_type); |
| |
|
| | in.substract_mean_normalize(mean_vals.data(), norm_vals.data()); |
| |
|
| | ex.input(input_blobs[j], in); |
| | } |
| |
|
| | for (int j = 0; j < conv_bottom_blob_count; j++) |
| | { |
| | ncnn::Mat out; |
| | ex.extract(conv_bottom_blobs[j], out); |
| |
|
| | |
| | { |
| | float absmax = 0.f; |
| |
|
| | const int outc = out.c; |
| | const int outsize = out.w * out.h; |
| | for (int p = 0; p < outc; p++) |
| | { |
| | const float* ptr = out.channel(p); |
| | for (int k = 0; k < outsize; k++) |
| | { |
| | absmax = std::max(absmax, (float)fabs(ptr[k])); |
| | } |
| | } |
| |
|
| | #pragma omp critical |
| | { |
| | QuantBlobStat& stat = quant_blob_stats[j]; |
| | stat.absmax = std::max(stat.absmax, absmax); |
| | } |
| | } |
| | } |
| | } |
| |
|
| | |
| | #pragma omp parallel for num_threads(quantize_num_threads) |
| | for (int i = 0; i < conv_bottom_blob_count; i++) |
| | { |
| | QuantBlobStat& stat = quant_blob_stats[i]; |
| |
|
| | stat.histogram.resize(num_histogram_bins, 0); |
| | stat.histogram_normed.resize(num_histogram_bins, 0); |
| | } |
| |
|
| | |
| | #pragma omp parallel for num_threads(quantize_num_threads) schedule(static, 1) |
| | for (int i = 0; i < image_count; i++) |
| | { |
| | if (i % 100 == 0) |
| | { |
| | fprintf(stderr, "build histogram %.2f%% [ %d / %d ]\n", i * 100.f / image_count, i, image_count); |
| | } |
| |
|
| | ncnn::Extractor ex = create_extractor(); |
| | ex.set_light_mode(true); |
| |
|
| | const int thread_num = ncnn::get_omp_thread_num(); |
| | ex.set_blob_allocator(&blob_allocators[thread_num]); |
| | ex.set_workspace_allocator(&workspace_allocators[thread_num]); |
| |
|
| | for (int j = 0; j < input_blob_count; j++) |
| | { |
| | const int type_to_pixel = type_to_pixels[j]; |
| | const std::vector<float>& mean_vals = means[j]; |
| | const std::vector<float>& norm_vals = norms[j]; |
| |
|
| | int pixel_convert_type = ncnn::Mat::PIXEL_BGR; |
| | if (type_to_pixel != pixel_convert_type) |
| | { |
| | pixel_convert_type = pixel_convert_type | (type_to_pixel << ncnn::Mat::PIXEL_CONVERT_SHIFT); |
| | } |
| |
|
| | ncnn::Mat in = read_and_resize_image(shapes[j], listspaths[j][i], pixel_convert_type); |
| |
|
| | in.substract_mean_normalize(mean_vals.data(), norm_vals.data()); |
| |
|
| | ex.input(input_blobs[j], in); |
| | } |
| |
|
| | for (int j = 0; j < conv_bottom_blob_count; j++) |
| | { |
| | ncnn::Mat out; |
| | ex.extract(conv_bottom_blobs[j], out); |
| |
|
| | |
| | { |
| | const float absmax = quant_blob_stats[j].absmax; |
| |
|
| | std::vector<uint64_t> histogram(num_histogram_bins, 0); |
| |
|
| | const int outc = out.c; |
| | const int outsize = out.w * out.h; |
| | for (int p = 0; p < outc; p++) |
| | { |
| | const float* ptr = out.channel(p); |
| | for (int k = 0; k < outsize; k++) |
| | { |
| | if (ptr[k] == 0.f) |
| | continue; |
| |
|
| | const int index = std::min((int)(fabs(ptr[k]) / absmax * num_histogram_bins), (num_histogram_bins - 1)); |
| |
|
| | histogram[index] += 1; |
| | } |
| | } |
| |
|
| | #pragma omp critical |
| | { |
| | QuantBlobStat& stat = quant_blob_stats[j]; |
| |
|
| | for (int k = 0; k < num_histogram_bins; k++) |
| | { |
| | stat.histogram[k] += histogram[k]; |
| | } |
| | } |
| | } |
| | } |
| | } |
| |
|
| | |
| | #pragma omp parallel for num_threads(quantize_num_threads) |
| | for (int i = 0; i < conv_bottom_blob_count; i++) |
| | { |
| | QuantBlobStat& stat = quant_blob_stats[i]; |
| |
|
| | |
| | { |
| | uint64_t sum = 0; |
| | for (int j = 0; j < num_histogram_bins; j++) |
| | { |
| | sum += stat.histogram[j]; |
| | } |
| |
|
| | for (int j = 0; j < num_histogram_bins; j++) |
| | { |
| | stat.histogram_normed[j] = (float)(stat.histogram[j] / (double)sum); |
| | } |
| | } |
| |
|
| | const int target_bin = 128; |
| |
|
| | int target_threshold = target_bin; |
| | float min_kl_divergence = FLT_MAX; |
| |
|
| | for (int threshold = target_bin; threshold < num_histogram_bins; threshold++) |
| | { |
| | const float kl_eps = 0.0001f; |
| |
|
| | std::vector<float> clip_distribution(threshold, kl_eps); |
| | { |
| | for (int j = 0; j < threshold; j++) |
| | { |
| | clip_distribution[j] += stat.histogram_normed[j]; |
| | } |
| | for (int j = threshold; j < num_histogram_bins; j++) |
| | { |
| | clip_distribution[threshold - 1] += stat.histogram_normed[j]; |
| | } |
| | } |
| |
|
| | const float num_per_bin = (float)threshold / target_bin; |
| |
|
| | std::vector<float> quantize_distribution(target_bin, 0.f); |
| | { |
| | { |
| | const float end = num_per_bin; |
| |
|
| | const int right_lower = (int)floor(end); |
| | const float right_scale = end - right_lower; |
| |
|
| | if (right_scale > 0) |
| | { |
| | quantize_distribution[0] += right_scale * stat.histogram_normed[right_lower]; |
| | } |
| |
|
| | for (int k = 0; k < right_lower; k++) |
| | { |
| | quantize_distribution[0] += stat.histogram_normed[k]; |
| | } |
| |
|
| | quantize_distribution[0] /= right_lower + right_scale; |
| | } |
| | for (int j = 1; j < target_bin - 1; j++) |
| | { |
| | const float start = j * num_per_bin; |
| | const float end = (j + 1) * num_per_bin; |
| |
|
| | const int left_upper = (int)ceil(start); |
| | const float left_scale = left_upper - start; |
| |
|
| | const int right_lower = (int)floor(end); |
| | const float right_scale = end - right_lower; |
| |
|
| | if (left_scale > 0) |
| | { |
| | quantize_distribution[j] += left_scale * stat.histogram_normed[left_upper - 1]; |
| | } |
| |
|
| | if (right_scale > 0) |
| | { |
| | quantize_distribution[j] += right_scale * stat.histogram_normed[right_lower]; |
| | } |
| |
|
| | for (int k = left_upper; k < right_lower; k++) |
| | { |
| | quantize_distribution[j] += stat.histogram_normed[k]; |
| | } |
| |
|
| | quantize_distribution[j] /= right_lower - left_upper + left_scale + right_scale; |
| | } |
| | { |
| | const float start = threshold - num_per_bin; |
| |
|
| | const int left_upper = (int)ceil(start); |
| | const float left_scale = left_upper - start; |
| |
|
| | if (left_scale > 0) |
| | { |
| | quantize_distribution[target_bin - 1] += left_scale * stat.histogram_normed[left_upper - 1]; |
| | } |
| |
|
| | for (int k = left_upper; k < threshold; k++) |
| | { |
| | quantize_distribution[target_bin - 1] += stat.histogram_normed[k]; |
| | } |
| |
|
| | quantize_distribution[target_bin - 1] /= threshold - left_upper + left_scale; |
| | } |
| | } |
| |
|
| | std::vector<float> expand_distribution(threshold, kl_eps); |
| | { |
| | { |
| | const float end = num_per_bin; |
| |
|
| | const int right_lower = (int)floor(end); |
| | const float right_scale = end - right_lower; |
| |
|
| | if (right_scale > 0) |
| | { |
| | expand_distribution[right_lower] += right_scale * quantize_distribution[0]; |
| | } |
| |
|
| | for (int k = 0; k < right_lower; k++) |
| | { |
| | expand_distribution[k] += quantize_distribution[0]; |
| | } |
| | } |
| | for (int j = 1; j < target_bin - 1; j++) |
| | { |
| | const float start = j * num_per_bin; |
| | const float end = (j + 1) * num_per_bin; |
| |
|
| | const int left_upper = (int)ceil(start); |
| | const float left_scale = left_upper - start; |
| |
|
| | const int right_lower = (int)floor(end); |
| | const float right_scale = end - right_lower; |
| |
|
| | if (left_scale > 0) |
| | { |
| | expand_distribution[left_upper - 1] += left_scale * quantize_distribution[j]; |
| | } |
| |
|
| | if (right_scale > 0) |
| | { |
| | expand_distribution[right_lower] += right_scale * quantize_distribution[j]; |
| | } |
| |
|
| | for (int k = left_upper; k < right_lower; k++) |
| | { |
| | expand_distribution[k] += quantize_distribution[j]; |
| | } |
| | } |
| | { |
| | const float start = threshold - num_per_bin; |
| |
|
| | const int left_upper = (int)ceil(start); |
| | const float left_scale = left_upper - start; |
| |
|
| | if (left_scale > 0) |
| | { |
| | expand_distribution[left_upper - 1] += left_scale * quantize_distribution[target_bin - 1]; |
| | } |
| |
|
| | for (int k = left_upper; k < threshold; k++) |
| | { |
| | expand_distribution[k] += quantize_distribution[target_bin - 1]; |
| | } |
| | } |
| | } |
| |
|
| | |
| | const float kl_divergence = compute_kl_divergence(clip_distribution, expand_distribution); |
| |
|
| | |
| | if (kl_divergence < min_kl_divergence) |
| | { |
| | min_kl_divergence = kl_divergence; |
| | target_threshold = threshold; |
| | } |
| | } |
| |
|
| | stat.threshold = (target_threshold + 0.5f) * stat.absmax / num_histogram_bins; |
| | float scale = 127 / stat.threshold; |
| |
|
| | bottom_blob_scales[i].create(1); |
| | bottom_blob_scales[i][0] = scale; |
| | } |
| |
|
| | return 0; |
| | } |
| |
|
| | static float compute_aciq_gaussian_clip(float absmax, int N, int num_bits = 8) |
| | { |
| | const float alpha_gaussian[8] = {0, 1.71063519, 2.15159277, 2.55913646, 2.93620062, 3.28691474, 3.6151146, 3.92403714}; |
| |
|
| | const double gaussian_const = (0.5 * 0.35) * (1 + sqrt(3.14159265358979323846 * log(4))); |
| |
|
| | double std = (absmax * 2 * gaussian_const) / sqrt(2 * log(N)); |
| |
|
| | return (float)(alpha_gaussian[num_bits - 1] * std); |
| | } |
| |
|
| | int QuantNet::quantize_ACIQ() |
| | { |
| | const int input_blob_count = (int)input_blobs.size(); |
| | const int conv_layer_count = (int)conv_layers.size(); |
| | const int conv_bottom_blob_count = (int)conv_bottom_blobs.size(); |
| | const int image_count = (int)listspaths[0].size(); |
| |
|
| | std::vector<ncnn::UnlockedPoolAllocator> blob_allocators(quantize_num_threads); |
| | std::vector<ncnn::UnlockedPoolAllocator> workspace_allocators(quantize_num_threads); |
| |
|
| | |
| | #pragma omp parallel for num_threads(quantize_num_threads) |
| | for (int i = 0; i < conv_layer_count; i++) |
| | { |
| | const ncnn::Layer* layer = layers[conv_layers[i]]; |
| |
|
| | if (layer->type == "Convolution") |
| | { |
| | const ncnn::Convolution* convolution = (const ncnn::Convolution*)layer; |
| |
|
| | const int num_output = convolution->num_output; |
| | const int kernel_w = convolution->kernel_w; |
| | const int kernel_h = convolution->kernel_h; |
| | const int dilation_w = convolution->dilation_w; |
| | const int dilation_h = convolution->dilation_h; |
| | const int stride_w = convolution->stride_w; |
| | const int stride_h = convolution->stride_h; |
| |
|
| | const int weight_data_size_output = convolution->weight_data_size / num_output; |
| |
|
| | |
| | |
| | bool quant_6bit = false; |
| | if (kernel_w == 3 && kernel_h == 3 && dilation_w == 1 && dilation_h == 1 && stride_w == 1 && stride_h == 1) |
| | quant_6bit = true; |
| |
|
| | weight_scales[i].create(num_output); |
| |
|
| | for (int n = 0; n < num_output; n++) |
| | { |
| | const ncnn::Mat weight_data_n = convolution->weight_data.range(weight_data_size_output * n, weight_data_size_output); |
| |
|
| | float absmax = 0.f; |
| | for (int k = 0; k < weight_data_size_output; k++) |
| | { |
| | absmax = std::max(absmax, (float)fabs(weight_data_n[k])); |
| | } |
| |
|
| | if (quant_6bit) |
| | { |
| | const float threshold = compute_aciq_gaussian_clip(absmax, weight_data_size_output, 6); |
| | weight_scales[i][n] = 31 / threshold; |
| | } |
| | else |
| | { |
| | const float threshold = compute_aciq_gaussian_clip(absmax, weight_data_size_output); |
| | weight_scales[i][n] = 127 / threshold; |
| | } |
| | } |
| | } |
| |
|
| | if (layer->type == "ConvolutionDepthWise") |
| | { |
| | const ncnn::ConvolutionDepthWise* convolutiondepthwise = (const ncnn::ConvolutionDepthWise*)layer; |
| |
|
| | const int group = convolutiondepthwise->group; |
| | const int weight_data_size_output = convolutiondepthwise->weight_data_size / group; |
| |
|
| | std::vector<float> scales; |
| |
|
| | weight_scales[i].create(group); |
| |
|
| | for (int n = 0; n < group; n++) |
| | { |
| | const ncnn::Mat weight_data_n = convolutiondepthwise->weight_data.range(weight_data_size_output * n, weight_data_size_output); |
| |
|
| | float absmax = 0.f; |
| | for (int k = 0; k < weight_data_size_output; k++) |
| | { |
| | absmax = std::max(absmax, (float)fabs(weight_data_n[k])); |
| | } |
| |
|
| | const float threshold = compute_aciq_gaussian_clip(absmax, weight_data_size_output); |
| | weight_scales[i][n] = 127 / threshold; |
| | } |
| | } |
| |
|
| | if (layer->type == "InnerProduct") |
| | { |
| | const ncnn::InnerProduct* innerproduct = (const ncnn::InnerProduct*)layer; |
| |
|
| | const int num_output = innerproduct->num_output; |
| | const int weight_data_size_output = innerproduct->weight_data_size / num_output; |
| |
|
| | weight_scales[i].create(num_output); |
| |
|
| | for (int n = 0; n < num_output; n++) |
| | { |
| | const ncnn::Mat weight_data_n = innerproduct->weight_data.range(weight_data_size_output * n, weight_data_size_output); |
| |
|
| | float absmax = 0.f; |
| | for (int k = 0; k < weight_data_size_output; k++) |
| | { |
| | absmax = std::max(absmax, (float)fabs(weight_data_n[k])); |
| | } |
| |
|
| | const float threshold = compute_aciq_gaussian_clip(absmax, weight_data_size_output); |
| | weight_scales[i][n] = 127 / threshold; |
| | } |
| | } |
| | } |
| |
|
| | |
| | #pragma omp parallel for num_threads(quantize_num_threads) schedule(static, 1) |
| | for (int i = 0; i < image_count; i++) |
| | { |
| | if (i % 100 == 0) |
| | { |
| | fprintf(stderr, "count the absmax %.2f%% [ %d / %d ]\n", i * 100.f / image_count, i, image_count); |
| | } |
| |
|
| | ncnn::Extractor ex = create_extractor(); |
| | ex.set_light_mode(true); |
| |
|
| | const int thread_num = ncnn::get_omp_thread_num(); |
| | ex.set_blob_allocator(&blob_allocators[thread_num]); |
| | ex.set_workspace_allocator(&workspace_allocators[thread_num]); |
| |
|
| | for (int j = 0; j < input_blob_count; j++) |
| | { |
| | const int type_to_pixel = type_to_pixels[j]; |
| | const std::vector<float>& mean_vals = means[j]; |
| | const std::vector<float>& norm_vals = norms[j]; |
| |
|
| | int pixel_convert_type = ncnn::Mat::PIXEL_BGR; |
| | if (type_to_pixel != pixel_convert_type) |
| | { |
| | pixel_convert_type = pixel_convert_type | (type_to_pixel << ncnn::Mat::PIXEL_CONVERT_SHIFT); |
| | } |
| |
|
| | ncnn::Mat in = read_and_resize_image(shapes[j], listspaths[j][i], pixel_convert_type); |
| |
|
| | in.substract_mean_normalize(mean_vals.data(), norm_vals.data()); |
| |
|
| | ex.input(input_blobs[j], in); |
| | } |
| |
|
| | for (int j = 0; j < conv_bottom_blob_count; j++) |
| | { |
| | ncnn::Mat out; |
| | ex.extract(conv_bottom_blobs[j], out); |
| |
|
| | |
| | { |
| | float absmax = 0.f; |
| |
|
| | const int outc = out.c; |
| | const int outsize = out.w * out.h; |
| | for (int p = 0; p < outc; p++) |
| | { |
| | const float* ptr = out.channel(p); |
| | for (int k = 0; k < outsize; k++) |
| | { |
| | absmax = std::max(absmax, (float)fabs(ptr[k])); |
| | } |
| | } |
| |
|
| | #pragma omp critical |
| | { |
| | QuantBlobStat& stat = quant_blob_stats[j]; |
| | stat.absmax = std::max(stat.absmax, absmax); |
| | stat.total = outc * outsize; |
| | } |
| | } |
| | } |
| | } |
| |
|
| | |
| | #pragma omp parallel for num_threads(quantize_num_threads) |
| | for (int i = 0; i < conv_bottom_blob_count; i++) |
| | { |
| | QuantBlobStat& stat = quant_blob_stats[i]; |
| |
|
| | stat.threshold = compute_aciq_gaussian_clip(stat.absmax, stat.total); |
| | float scale = 127 / stat.threshold; |
| |
|
| | bottom_blob_scales[i].create(1); |
| | bottom_blob_scales[i][0] = scale; |
| | } |
| |
|
| | return 0; |
| | } |
| |
|
| | static float cosine_similarity(const ncnn::Mat& a, const ncnn::Mat& b) |
| | { |
| | const int chanenls = a.c; |
| | const int size = a.w * a.h; |
| |
|
| | float sa = 0; |
| | float sb = 0; |
| | float sum = 0; |
| |
|
| | for (int p = 0; p < chanenls; p++) |
| | { |
| | const float* pa = a.channel(p); |
| | const float* pb = b.channel(p); |
| |
|
| | for (int i = 0; i < size; i++) |
| | { |
| | sa += pa[i] * pa[i]; |
| | sb += pb[i] * pb[i]; |
| | sum += pa[i] * pb[i]; |
| | } |
| | } |
| |
|
| | float sim = (float)sum / sqrt(sa) / sqrt(sb); |
| |
|
| | return sim; |
| | } |
| |
|
| | static int get_layer_param(const ncnn::Layer* layer, ncnn::ParamDict& pd) |
| | { |
| | if (layer->type == "Convolution") |
| | { |
| | ncnn::Convolution* convolution = (ncnn::Convolution*)layer; |
| |
|
| | pd.set(0, convolution->num_output); |
| | pd.set(1, convolution->kernel_w); |
| | pd.set(11, convolution->kernel_h); |
| | pd.set(2, convolution->dilation_w); |
| | pd.set(12, convolution->dilation_h); |
| | pd.set(3, convolution->stride_w); |
| | pd.set(13, convolution->stride_h); |
| | pd.set(4, convolution->pad_left); |
| | pd.set(15, convolution->pad_right); |
| | pd.set(14, convolution->pad_top); |
| | pd.set(16, convolution->pad_bottom); |
| | pd.set(18, convolution->pad_value); |
| | pd.set(5, convolution->bias_term); |
| | pd.set(6, convolution->weight_data_size); |
| | pd.set(8, convolution->int8_scale_term); |
| | pd.set(9, convolution->activation_type); |
| | pd.set(10, convolution->activation_params); |
| | } |
| | else if (layer->type == "ConvolutionDepthWise") |
| | { |
| | ncnn::ConvolutionDepthWise* convolutiondepthwise = (ncnn::ConvolutionDepthWise*)layer; |
| |
|
| | pd.set(0, convolutiondepthwise->num_output); |
| | pd.set(1, convolutiondepthwise->kernel_w); |
| | pd.set(11, convolutiondepthwise->kernel_h); |
| | pd.set(2, convolutiondepthwise->dilation_w); |
| | pd.set(12, convolutiondepthwise->dilation_h); |
| | pd.set(3, convolutiondepthwise->stride_w); |
| | pd.set(13, convolutiondepthwise->stride_h); |
| | pd.set(4, convolutiondepthwise->pad_left); |
| | pd.set(15, convolutiondepthwise->pad_right); |
| | pd.set(14, convolutiondepthwise->pad_top); |
| | pd.set(16, convolutiondepthwise->pad_bottom); |
| | pd.set(18, convolutiondepthwise->pad_value); |
| | pd.set(5, convolutiondepthwise->bias_term); |
| | pd.set(6, convolutiondepthwise->weight_data_size); |
| | pd.set(7, convolutiondepthwise->group); |
| | pd.set(8, convolutiondepthwise->int8_scale_term); |
| | pd.set(9, convolutiondepthwise->activation_type); |
| | pd.set(10, convolutiondepthwise->activation_params); |
| | } |
| | else if (layer->type == "InnerProduct") |
| | { |
| | ncnn::InnerProduct* innerproduct = (ncnn::InnerProduct*)layer; |
| |
|
| | pd.set(0, innerproduct->num_output); |
| | pd.set(1, innerproduct->bias_term); |
| | pd.set(2, innerproduct->weight_data_size); |
| | pd.set(8, innerproduct->int8_scale_term); |
| | pd.set(9, innerproduct->activation_type); |
| | pd.set(10, innerproduct->activation_params); |
| | } |
| | else |
| | { |
| | fprintf(stderr, "unexpected layer type %s in get_layer_param\n", layer->type.c_str()); |
| | return -1; |
| | } |
| |
|
| | return 0; |
| | } |
| |
|
| | static int get_layer_weights(const ncnn::Layer* layer, std::vector<ncnn::Mat>& weights) |
| | { |
| | if (layer->type == "Convolution") |
| | { |
| | ncnn::Convolution* convolution = (ncnn::Convolution*)layer; |
| | weights.push_back(convolution->weight_data); |
| | if (convolution->bias_term) |
| | weights.push_back(convolution->bias_data); |
| | } |
| | else if (layer->type == "ConvolutionDepthWise") |
| | { |
| | ncnn::ConvolutionDepthWise* convolutiondepthwise = (ncnn::ConvolutionDepthWise*)layer; |
| | weights.push_back(convolutiondepthwise->weight_data); |
| | if (convolutiondepthwise->bias_term) |
| | weights.push_back(convolutiondepthwise->bias_data); |
| | } |
| | else if (layer->type == "InnerProduct") |
| | { |
| | ncnn::InnerProduct* innerproduct = (ncnn::InnerProduct*)layer; |
| | weights.push_back(innerproduct->weight_data); |
| | if (innerproduct->bias_term) |
| | weights.push_back(innerproduct->bias_data); |
| | } |
| | else |
| | { |
| | fprintf(stderr, "unexpected layer type %s in get_layer_weights\n", layer->type.c_str()); |
| | return -1; |
| | } |
| |
|
| | return 0; |
| | } |
| |
|
| | int QuantNet::quantize_EQ() |
| | { |
| | |
| | quantize_KL(); |
| |
|
| | print_quant_info(); |
| |
|
| | const int input_blob_count = (int)input_blobs.size(); |
| | const int conv_layer_count = (int)conv_layers.size(); |
| | const int conv_bottom_blob_count = (int)conv_bottom_blobs.size(); |
| |
|
| | std::vector<ncnn::UnlockedPoolAllocator> blob_allocators(quantize_num_threads); |
| | std::vector<ncnn::UnlockedPoolAllocator> workspace_allocators(quantize_num_threads); |
| |
|
| | |
| | const int image_count = std::min((int)listspaths[0].size(), 50); |
| |
|
| | const float scale_range_lower = 0.5f; |
| | const float scale_range_upper = 2.0f; |
| | const int search_steps = 100; |
| |
|
| | for (int i = 0; i < conv_layer_count; i++) |
| | { |
| | ncnn::Mat& weight_scale = weight_scales[i]; |
| | ncnn::Mat& bottom_blob_scale = bottom_blob_scales[i]; |
| |
|
| | const ncnn::Layer* layer = layers[conv_layers[i]]; |
| |
|
| | |
| | for (int j = 0; j < weight_scale.w; j++) |
| | { |
| | const float scale = weight_scale[j]; |
| | const float scale_lower = scale * scale_range_lower; |
| | const float scale_upper = scale * scale_range_upper; |
| | const float scale_step = (scale_upper - scale_lower) / search_steps; |
| |
|
| | std::vector<double> avgsims(search_steps, 0.0); |
| |
|
| | #pragma omp parallel for num_threads(quantize_num_threads) schedule(static, 1) |
| | for (int ii = 0; ii < image_count; ii++) |
| | { |
| | if (ii % 100 == 0) |
| | { |
| | fprintf(stderr, "search weight scale %.2f%% [ %d / %d ] for %d / %d of %d / %d\n", ii * 100.f / image_count, ii, image_count, j, weight_scale.w, i, conv_layer_count); |
| | } |
| |
|
| | ncnn::Extractor ex = create_extractor(); |
| | ex.set_light_mode(true); |
| |
|
| | const int thread_num = ncnn::get_omp_thread_num(); |
| | ex.set_blob_allocator(&blob_allocators[thread_num]); |
| | ex.set_workspace_allocator(&workspace_allocators[thread_num]); |
| |
|
| | for (int jj = 0; jj < input_blob_count; jj++) |
| | { |
| | const int type_to_pixel = type_to_pixels[jj]; |
| | const std::vector<float>& mean_vals = means[jj]; |
| | const std::vector<float>& norm_vals = norms[jj]; |
| |
|
| | int pixel_convert_type = ncnn::Mat::PIXEL_BGR; |
| | if (type_to_pixel != pixel_convert_type) |
| | { |
| | pixel_convert_type = pixel_convert_type | (type_to_pixel << ncnn::Mat::PIXEL_CONVERT_SHIFT); |
| | } |
| |
|
| | ncnn::Mat in = read_and_resize_image(shapes[jj], listspaths[jj][ii], pixel_convert_type); |
| |
|
| | in.substract_mean_normalize(mean_vals.data(), norm_vals.data()); |
| |
|
| | ex.input(input_blobs[jj], in); |
| | } |
| |
|
| | ncnn::Mat in; |
| | ex.extract(conv_bottom_blobs[i], in); |
| |
|
| | ncnn::Mat out; |
| | ex.extract(conv_top_blobs[i], out); |
| |
|
| | ncnn::Layer* layer_int8 = ncnn::create_layer(layer->typeindex); |
| |
|
| | ncnn::ParamDict pd; |
| | get_layer_param(layer, pd); |
| | pd.set(8, 1); |
| | layer_int8->load_param(pd); |
| |
|
| | std::vector<float> sims(search_steps); |
| | for (int k = 0; k < search_steps; k++) |
| | { |
| | ncnn::Mat new_weight_scale = weight_scale.clone(); |
| | new_weight_scale[j] = scale_lower + k * scale_step; |
| |
|
| | std::vector<ncnn::Mat> weights; |
| | get_layer_weights(layer, weights); |
| | weights.push_back(new_weight_scale); |
| | weights.push_back(bottom_blob_scale); |
| | layer_int8->load_model(ncnn::ModelBinFromMatArray(weights.data())); |
| |
|
| | ncnn::Option opt_int8; |
| | opt_int8.use_packing_layout = false; |
| |
|
| | layer_int8->create_pipeline(opt_int8); |
| |
|
| | ncnn::Mat out_int8; |
| | layer_int8->forward(in, out_int8, opt_int8); |
| |
|
| | layer_int8->destroy_pipeline(opt_int8); |
| |
|
| | sims[k] = cosine_similarity(out, out_int8); |
| | } |
| |
|
| | delete layer_int8; |
| |
|
| | #pragma omp critical |
| | { |
| | for (int k = 0; k < search_steps; k++) |
| | { |
| | avgsims[k] += sims[k]; |
| | } |
| | } |
| | } |
| |
|
| | double max_avgsim = 0.0; |
| | float new_scale = scale; |
| |
|
| | |
| | for (int k = 0; k < search_steps; k++) |
| | { |
| | if (max_avgsim < avgsims[k]) |
| | { |
| | max_avgsim = avgsims[k]; |
| | new_scale = scale_lower + k * scale_step; |
| | } |
| | } |
| |
|
| | fprintf(stderr, "%s w %d = %f -> %f\n", layer->name.c_str(), j, scale, new_scale); |
| | weight_scale[j] = new_scale; |
| | } |
| |
|
| | |
| | for (int j = 0; j < bottom_blob_scale.w; j++) |
| | { |
| | const float scale = bottom_blob_scale[j]; |
| | const float scale_lower = scale * scale_range_lower; |
| | const float scale_upper = scale * scale_range_upper; |
| | const float scale_step = (scale_upper - scale_lower) / search_steps; |
| |
|
| | std::vector<double> avgsims(search_steps, 0.0); |
| |
|
| | #pragma omp parallel for num_threads(quantize_num_threads) schedule(static, 1) |
| | for (int ii = 0; ii < image_count; ii++) |
| | { |
| | if (ii % 100 == 0) |
| | { |
| | fprintf(stderr, "search bottom blob scale %.2f%% [ %d / %d ] for %d / %d of %d / %d\n", ii * 100.f / image_count, ii, image_count, j, bottom_blob_scale.w, i, conv_layer_count); |
| | } |
| |
|
| | ncnn::Extractor ex = create_extractor(); |
| | ex.set_light_mode(true); |
| |
|
| | const int thread_num = ncnn::get_omp_thread_num(); |
| | ex.set_blob_allocator(&blob_allocators[thread_num]); |
| | ex.set_workspace_allocator(&workspace_allocators[thread_num]); |
| |
|
| | for (int jj = 0; jj < input_blob_count; jj++) |
| | { |
| | const int type_to_pixel = type_to_pixels[jj]; |
| | const std::vector<float>& mean_vals = means[jj]; |
| | const std::vector<float>& norm_vals = norms[jj]; |
| |
|
| | int pixel_convert_type = ncnn::Mat::PIXEL_BGR; |
| | if (type_to_pixel != pixel_convert_type) |
| | { |
| | pixel_convert_type = pixel_convert_type | (type_to_pixel << ncnn::Mat::PIXEL_CONVERT_SHIFT); |
| | } |
| |
|
| | ncnn::Mat in = read_and_resize_image(shapes[jj], listspaths[jj][ii], pixel_convert_type); |
| |
|
| | in.substract_mean_normalize(mean_vals.data(), norm_vals.data()); |
| |
|
| | ex.input(input_blobs[jj], in); |
| | } |
| |
|
| | ncnn::Mat in; |
| | ex.extract(conv_bottom_blobs[i], in); |
| |
|
| | ncnn::Mat out; |
| | ex.extract(conv_top_blobs[i], out); |
| |
|
| | ncnn::Layer* layer_int8 = ncnn::create_layer(layer->typeindex); |
| |
|
| | ncnn::ParamDict pd; |
| | get_layer_param(layer, pd); |
| | pd.set(8, 1); |
| | layer_int8->load_param(pd); |
| |
|
| | std::vector<float> sims(search_steps); |
| | for (int k = 0; k < search_steps; k++) |
| | { |
| | ncnn::Mat new_bottom_blob_scale = bottom_blob_scale.clone(); |
| | new_bottom_blob_scale[j] = scale_lower + k * scale_step; |
| |
|
| | std::vector<ncnn::Mat> weights; |
| | get_layer_weights(layer, weights); |
| | weights.push_back(weight_scale); |
| | weights.push_back(new_bottom_blob_scale); |
| | layer_int8->load_model(ncnn::ModelBinFromMatArray(weights.data())); |
| |
|
| | ncnn::Option opt_int8; |
| | opt_int8.use_packing_layout = false; |
| |
|
| | layer_int8->create_pipeline(opt_int8); |
| |
|
| | ncnn::Mat out_int8; |
| | layer_int8->forward(in, out_int8, opt_int8); |
| |
|
| | layer_int8->destroy_pipeline(opt_int8); |
| |
|
| | sims[k] = cosine_similarity(out, out_int8); |
| | } |
| |
|
| | delete layer_int8; |
| |
|
| | #pragma omp critical |
| | { |
| | for (int k = 0; k < search_steps; k++) |
| | { |
| | avgsims[k] += sims[k]; |
| | } |
| | } |
| | } |
| |
|
| | double max_avgsim = 0.0; |
| | float new_scale = scale; |
| |
|
| | |
| | for (int k = 0; k < search_steps; k++) |
| | { |
| | if (max_avgsim < avgsims[k]) |
| | { |
| | max_avgsim = avgsims[k]; |
| | new_scale = scale_lower + k * scale_step; |
| | } |
| | } |
| |
|
| | fprintf(stderr, "%s b %d = %f -> %f\n", layer->name.c_str(), j, scale, new_scale); |
| | bottom_blob_scale[j] = new_scale; |
| | } |
| |
|
| | |
| | QuantBlobStat& stat = quant_blob_stats[i]; |
| | stat.threshold = 127 / bottom_blob_scale[0]; |
| | } |
| |
|
| | return 0; |
| | } |
| |
|
| | static std::vector<std::vector<std::string> > parse_comma_path_list(char* s) |
| | { |
| | std::vector<std::vector<std::string> > aps; |
| |
|
| | char* pch = strtok(s, ","); |
| | while (pch != NULL) |
| | { |
| | FILE* fp = fopen(pch, "rb"); |
| | if (!fp) |
| | { |
| | fprintf(stderr, "fopen %s failed\n", pch); |
| | break; |
| | } |
| |
|
| | std::vector<std::string> paths; |
| |
|
| | |
| | char line[1024]; |
| | while (!feof(fp)) |
| | { |
| | char* ss = fgets(line, 1024, fp); |
| | if (!ss) |
| | break; |
| |
|
| | char filepath[256]; |
| | int nscan = sscanf(line, "%255s", filepath); |
| | if (nscan != 1) |
| | continue; |
| |
|
| | paths.push_back(std::string(filepath)); |
| | } |
| |
|
| | fclose(fp); |
| |
|
| | aps.push_back(paths); |
| |
|
| | pch = strtok(NULL, ","); |
| | } |
| |
|
| | return aps; |
| | } |
| |
|
| | static float vstr_to_float(const char vstr[20]) |
| | { |
| | double v = 0.0; |
| |
|
| | const char* p = vstr; |
| |
|
| | |
| | bool sign = *p != '-'; |
| | if (*p == '+' || *p == '-') |
| | { |
| | p++; |
| | } |
| |
|
| | |
| | uint64_t v1 = 0; |
| | while (isdigit(*p)) |
| | { |
| | v1 = v1 * 10 + (*p - '0'); |
| | p++; |
| | } |
| |
|
| | v = (double)v1; |
| |
|
| | |
| | if (*p == '.') |
| | { |
| | p++; |
| |
|
| | uint64_t pow10 = 1; |
| | uint64_t v2 = 0; |
| |
|
| | while (isdigit(*p)) |
| | { |
| | v2 = v2 * 10 + (*p - '0'); |
| | pow10 *= 10; |
| | p++; |
| | } |
| |
|
| | v += v2 / (double)pow10; |
| | } |
| |
|
| | |
| | if (*p == 'e' || *p == 'E') |
| | { |
| | p++; |
| |
|
| | |
| | bool fact = *p != '-'; |
| | if (*p == '+' || *p == '-') |
| | { |
| | p++; |
| | } |
| |
|
| | |
| | uint64_t expon = 0; |
| | while (isdigit(*p)) |
| | { |
| | expon = expon * 10 + (*p - '0'); |
| | p++; |
| | } |
| |
|
| | double scale = 1.0; |
| | while (expon >= 8) |
| | { |
| | scale *= 1e8; |
| | expon -= 8; |
| | } |
| | while (expon > 0) |
| | { |
| | scale *= 10.0; |
| | expon -= 1; |
| | } |
| |
|
| | v = fact ? v * scale : v / scale; |
| | } |
| |
|
| | |
| | return sign ? (float)v : (float)-v; |
| | } |
| |
|
| | static std::vector<std::vector<float> > parse_comma_float_array_list(char* s) |
| | { |
| | std::vector<std::vector<float> > aaf; |
| |
|
| | char* pch = strtok(s, "[]"); |
| | while (pch != NULL) |
| | { |
| | |
| | char vstr[20]; |
| | int nconsumed = 0; |
| | int nscan = sscanf(pch, "%19[^,]%n", vstr, &nconsumed); |
| | if (nscan == 1) |
| | { |
| | |
| | pch += nconsumed; |
| |
|
| | std::vector<float> af; |
| | float v = vstr_to_float(vstr); |
| | af.push_back(v); |
| |
|
| | nscan = sscanf(pch, ",%19[^,]%n", vstr, &nconsumed); |
| | while (nscan == 1) |
| | { |
| | pch += nconsumed; |
| |
|
| | float v = vstr_to_float(vstr); |
| | af.push_back(v); |
| |
|
| | nscan = sscanf(pch, ",%19[^,]%n", vstr, &nconsumed); |
| | } |
| |
|
| | |
| | aaf.push_back(af); |
| | } |
| |
|
| | pch = strtok(NULL, "[]"); |
| | } |
| |
|
| | return aaf; |
| | } |
| |
|
| | static std::vector<std::vector<int> > parse_comma_int_array_list(char* s) |
| | { |
| | std::vector<std::vector<int> > aai; |
| |
|
| | char* pch = strtok(s, "[]"); |
| | while (pch != NULL) |
| | { |
| | |
| | int v; |
| | int nconsumed = 0; |
| | int nscan = sscanf(pch, "%d%n", &v, &nconsumed); |
| | if (nscan == 1) |
| | { |
| | |
| | pch += nconsumed; |
| |
|
| | std::vector<int> ai; |
| | ai.push_back(v); |
| |
|
| | nscan = sscanf(pch, ",%d%n", &v, &nconsumed); |
| | while (nscan == 1) |
| | { |
| | pch += nconsumed; |
| |
|
| | ai.push_back(v); |
| |
|
| | nscan = sscanf(pch, ",%d%n", &v, &nconsumed); |
| | } |
| |
|
| | |
| | aai.push_back(ai); |
| | } |
| |
|
| | pch = strtok(NULL, "[]"); |
| | } |
| |
|
| | return aai; |
| | } |
| |
|
| | static std::vector<int> parse_comma_pixel_type_list(char* s) |
| | { |
| | std::vector<int> aps; |
| |
|
| | char* pch = strtok(s, ","); |
| | while (pch != NULL) |
| | { |
| | |
| | if (strcmp(pch, "RAW") == 0) |
| | aps.push_back(-233); |
| | if (strcmp(pch, "RGB") == 0) |
| | aps.push_back(ncnn::Mat::PIXEL_RGB); |
| | if (strcmp(pch, "BGR") == 0) |
| | aps.push_back(ncnn::Mat::PIXEL_BGR); |
| | if (strcmp(pch, "GRAY") == 0) |
| | aps.push_back(ncnn::Mat::PIXEL_GRAY); |
| | if (strcmp(pch, "RGBA") == 0) |
| | aps.push_back(ncnn::Mat::PIXEL_RGBA); |
| | if (strcmp(pch, "BGRA") == 0) |
| | aps.push_back(ncnn::Mat::PIXEL_BGRA); |
| |
|
| | pch = strtok(NULL, ","); |
| | } |
| |
|
| | return aps; |
| | } |
| |
|
| | static void print_float_array_list(const std::vector<std::vector<float> >& list) |
| | { |
| | for (size_t i = 0; i < list.size(); i++) |
| | { |
| | const std::vector<float>& array = list[i]; |
| | fprintf(stderr, "["); |
| | for (size_t j = 0; j < array.size(); j++) |
| | { |
| | fprintf(stderr, "%f", array[j]); |
| | if (j != array.size() - 1) |
| | fprintf(stderr, ","); |
| | } |
| | fprintf(stderr, "]"); |
| | if (i != list.size() - 1) |
| | fprintf(stderr, ","); |
| | } |
| | } |
| |
|
| | static void print_int_array_list(const std::vector<std::vector<int> >& list) |
| | { |
| | for (size_t i = 0; i < list.size(); i++) |
| | { |
| | const std::vector<int>& array = list[i]; |
| | fprintf(stderr, "["); |
| | for (size_t j = 0; j < array.size(); j++) |
| | { |
| | fprintf(stderr, "%d", array[j]); |
| | if (j != array.size() - 1) |
| | fprintf(stderr, ","); |
| | } |
| | fprintf(stderr, "]"); |
| | if (i != list.size() - 1) |
| | fprintf(stderr, ","); |
| | } |
| | } |
| |
|
| | static void print_pixel_type_list(const std::vector<int>& list) |
| | { |
| | for (size_t i = 0; i < list.size(); i++) |
| | { |
| | const int type = list[i]; |
| | if (type == -233) |
| | fprintf(stderr, "RAW"); |
| | if (type == ncnn::Mat::PIXEL_RGB) |
| | fprintf(stderr, "RGB"); |
| | if (type == ncnn::Mat::PIXEL_BGR) |
| | fprintf(stderr, "BGR"); |
| | if (type == ncnn::Mat::PIXEL_GRAY) |
| | fprintf(stderr, "GRAY"); |
| | if (type == ncnn::Mat::PIXEL_RGBA) |
| | fprintf(stderr, "RGBA"); |
| | if (type == ncnn::Mat::PIXEL_BGRA) |
| | fprintf(stderr, "BGRA"); |
| | if (i != list.size() - 1) |
| | fprintf(stderr, ","); |
| | } |
| | } |
| |
|
| | static void show_usage() |
| | { |
| | fprintf(stderr, "Usage: ncnn2table [ncnnparam] [ncnnbin] [list,...] [ncnntable] [(key=value)...]\n"); |
| | fprintf(stderr, " mean=[104.0,117.0,123.0],...\n"); |
| | fprintf(stderr, " norm=[1.0,1.0,1.0],...\n"); |
| | fprintf(stderr, " shape=[224,224,3],...[w,h,c] or [w,h] **[0,0] will not resize\n"); |
| | fprintf(stderr, " pixel=RAW/RGB/BGR/GRAY/RGBA/BGRA,...\n"); |
| | fprintf(stderr, " thread=8\n"); |
| | fprintf(stderr, " method=kl/aciq/eq\n"); |
| | fprintf(stderr, "Sample usage: ncnn2table squeezenet.param squeezenet.bin imagelist.txt squeezenet.table mean=[104.0,117.0,123.0] norm=[1.0,1.0,1.0] shape=[227,227,3] pixel=BGR method=kl\n"); |
| | } |
| |
|
| | int main(int argc, char** argv) |
| | { |
| | if (argc < 5) |
| | { |
| | show_usage(); |
| | return -1; |
| | } |
| |
|
| | for (int i = 1; i < argc; i++) |
| | { |
| | if (argv[i][0] == '-') |
| | { |
| | show_usage(); |
| | return -1; |
| | } |
| | } |
| |
|
| | const char* inparam = argv[1]; |
| | const char* inbin = argv[2]; |
| | char* lists = argv[3]; |
| | const char* outtable = argv[4]; |
| |
|
| | ncnn::Option opt; |
| | opt.num_threads = 1; |
| | opt.lightmode = false; |
| | opt.use_fp16_packed = false; |
| | opt.use_fp16_storage = false; |
| | opt.use_fp16_arithmetic = false; |
| |
|
| | QuantNet net; |
| | net.opt = opt; |
| | net.load_param(inparam); |
| | net.load_model(inbin); |
| |
|
| | net.init(); |
| |
|
| | |
| | net.listspaths = parse_comma_path_list(lists); |
| |
|
| | std::string method = "kl"; |
| |
|
| | for (int i = 5; i < argc; i++) |
| | { |
| | |
| | char* kv = argv[i]; |
| |
|
| | char* eqs = strchr(kv, '='); |
| | if (eqs == NULL) |
| | { |
| | fprintf(stderr, "unrecognized arg %s\n", kv); |
| | continue; |
| | } |
| |
|
| | |
| | eqs[0] = '\0'; |
| | const char* key = kv; |
| | char* value = eqs + 1; |
| |
|
| | |
| | if (memcmp(key, "mean", 4) == 0) |
| | net.means = parse_comma_float_array_list(value); |
| | if (memcmp(key, "norm", 4) == 0) |
| | net.norms = parse_comma_float_array_list(value); |
| | if (memcmp(key, "shape", 5) == 0) |
| | net.shapes = parse_comma_int_array_list(value); |
| | if (memcmp(key, "pixel", 5) == 0) |
| | net.type_to_pixels = parse_comma_pixel_type_list(value); |
| | if (memcmp(key, "thread", 6) == 0) |
| | net.quantize_num_threads = atoi(value); |
| | if (memcmp(key, "method", 6) == 0) |
| | method = std::string(value); |
| | } |
| |
|
| | |
| | const size_t input_blob_count = net.input_blobs.size(); |
| | if (net.listspaths.size() != input_blob_count) |
| | { |
| | fprintf(stderr, "expect %d lists, but got %d\n", (int)input_blob_count, (int)net.listspaths.size()); |
| | return -1; |
| | } |
| | if (net.means.size() != input_blob_count) |
| | { |
| | fprintf(stderr, "expect %d means, but got %d\n", (int)input_blob_count, (int)net.means.size()); |
| | return -1; |
| | } |
| | if (net.norms.size() != input_blob_count) |
| | { |
| | fprintf(stderr, "expect %d norms, but got %d\n", (int)input_blob_count, (int)net.norms.size()); |
| | return -1; |
| | } |
| | if (net.shapes.size() != input_blob_count) |
| | { |
| | fprintf(stderr, "expect %d shapes, but got %d\n", (int)input_blob_count, (int)net.shapes.size()); |
| | return -1; |
| | } |
| | if (net.type_to_pixels.size() != input_blob_count) |
| | { |
| | fprintf(stderr, "expect %d pixels, but got %d\n", (int)input_blob_count, (int)net.type_to_pixels.size()); |
| | return -1; |
| | } |
| | if (net.quantize_num_threads < 0) |
| | { |
| | fprintf(stderr, "malformed thread %d\n", net.quantize_num_threads); |
| | return -1; |
| | } |
| |
|
| | |
| | { |
| | fprintf(stderr, "mean = "); |
| | print_float_array_list(net.means); |
| | fprintf(stderr, "\n"); |
| | fprintf(stderr, "norm = "); |
| | print_float_array_list(net.norms); |
| | fprintf(stderr, "\n"); |
| | fprintf(stderr, "shape = "); |
| | print_int_array_list(net.shapes); |
| | fprintf(stderr, "\n"); |
| | fprintf(stderr, "pixel = "); |
| | print_pixel_type_list(net.type_to_pixels); |
| | fprintf(stderr, "\n"); |
| | fprintf(stderr, "thread = %d\n", net.quantize_num_threads); |
| | fprintf(stderr, "method = %s\n", method.c_str()); |
| | fprintf(stderr, "---------------------------------------\n"); |
| | } |
| |
|
| | if (method == "kl") |
| | { |
| | net.quantize_KL(); |
| | } |
| | else if (method == "aciq") |
| | { |
| | net.quantize_ACIQ(); |
| | } |
| | else if (method == "eq") |
| | { |
| | net.quantize_EQ(); |
| | } |
| | else |
| | { |
| | fprintf(stderr, "not implemented yet !\n"); |
| | fprintf(stderr, "unknown method %s, expect kl / aciq / eq\n", method.c_str()); |
| | return -1; |
| | } |
| |
|
| | net.print_quant_info(); |
| |
|
| | net.save_table(outtable); |
| |
|
| | return 0; |
| | } |
| |
|