File size: 2,379 Bytes
d1d4335 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 |
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/
#pragma once
// This file defines common utilities used in code compiled with avx2/avx512
// flags.
#include <cstdint>
#include <string>
namespace fbgemm {
enum class FBGEMM_ENUM_CLASS_API QuantizationGranularity {
TENSOR,
GROUP,
OUT_CHANNEL,
};
/**
* @brief A struct to represent a block of a matrix.
*/
struct FBGEMM_API block_type_t {
int row_start;
int row_size;
int col_start;
int col_size;
std::string toString() const {
std::string out = "";
out += "row start:" + std::to_string(row_start) + ", ";
out += "row size:" + std::to_string(row_size) + ", ";
out += "col start:" + std::to_string(col_start) + ", ";
out += "col size:" + std::to_string(col_size);
return out;
}
};
/**
* @brief A struct to represent all the requantization parameters.
*
* Please note that this is different from RequantizationParams in
* QuantUtilsAvx2.h as it combines all the parameters needed for various
* quantization granularities
*/
template <typename BIAS_TYPE = std::int32_t>
struct requantizationParams_t {
using BIAS_T = BIAS_TYPE;
std::int32_t A_zero_point;
const std::int32_t* B_zero_point;
std::int32_t C_zero_point;
const float* C_multiplier;
const std::int32_t* row_offsets;
const std::int32_t* col_offsets;
const BIAS_T* bias;
std::uint32_t ncols;
int groups;
const float* act_times_w_scale;
};
/**
* @brief A struct to represent all the parameters for requantizing for floats.
*/
struct requantizationForFloatParams_t {
std::int32_t A_zero_point;
const std::int32_t* B_zero_point;
float A_scale;
const float* B_scale;
const std::int32_t* row_offsets;
const std::int32_t* col_offsets;
const float* bias;
std::uint32_t ncols;
int groups;
};
/**
* @brief Allocate size bytes of uninitialized storage whose alignment is
* specified by align.
*/
FBGEMM_API void*
fbgemmAlignedAlloc(size_t align, size_t size, bool raiseException = false);
/**
* @brief Free memory allocated by fbgemmAlignedAlloc
*/
FBGEMM_API void fbgemmAlignedFree(void* p);
} // namespace fbgemm
|