|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#pragma once |
|
|
|
|
|
#include <cstdint> |
|
|
#include "./FbgemmBuild.h" |
|
|
#include "./UtilsAvx2.h" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
namespace fbgemm { |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
struct FBGEMM_API TensorQuantizationParams { |
|
|
float scale; |
|
|
std::int32_t zero_point; |
|
|
int precision; |
|
|
float Min() const; |
|
|
float Max() const; |
|
|
}; |
|
|
|
|
|
|
|
|
|
|
|
struct FBGEMM_API RequantizationParams { |
|
|
|
|
|
float real_multiplier; |
|
|
|
|
|
|
|
|
std::int32_t multiplier; |
|
|
int right_shift; |
|
|
|
|
|
TensorQuantizationParams target_qparams; |
|
|
}; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
template <typename T = std::uint8_t, bool LEGACY = true> |
|
|
void QuantizeAvx2( |
|
|
const float* src, |
|
|
T* dst, |
|
|
int64_t len, |
|
|
const TensorQuantizationParams& qparams); |
|
|
|
|
|
template <typename T = std::uint8_t> |
|
|
void FusedQuantizeDequantizeAvx2( |
|
|
const float* src, |
|
|
float* dst, |
|
|
int len, |
|
|
const TensorQuantizationParams& qparams, |
|
|
float noise_ratio = 0.0f); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
uint32_t FBGEMM_API Xor128(); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void FBGEMM_API FindMinMax(const float* m, float* min, float* max, int64_t len); |
|
|
|
|
|
void RequantizeFixedPointAvx2( |
|
|
const std::int32_t* src, |
|
|
std::uint8_t* dst, |
|
|
int len, |
|
|
const RequantizationParams& params); |
|
|
|
|
|
void RequantizeAvx2( |
|
|
const std::int32_t* src, |
|
|
std::uint8_t* dst, |
|
|
int len, |
|
|
const RequantizationParams& params); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
template < |
|
|
bool A_SYMMETRIC, |
|
|
bool B_SYMMETRIC, |
|
|
QuantizationGranularity Q_GRAN, |
|
|
bool HAS_BIAS, |
|
|
bool FUSE_RELU, |
|
|
typename BIAS_TYPE = std::int32_t, |
|
|
bool DIRECT = false> |
|
|
FBGEMM_API void requantizeOutputProcessingAvx2( |
|
|
std::uint8_t* out, |
|
|
const std::int32_t* inp, |
|
|
const block_type_t& block, |
|
|
int ld_out, |
|
|
int ld_in, |
|
|
const requantizationParams_t<BIAS_TYPE>& r); |
|
|
|
|
|
template < |
|
|
bool A_SYMMETRIC, |
|
|
bool B_SYMMETRIC, |
|
|
QuantizationGranularity Q_GRAN, |
|
|
bool HAS_BIAS, |
|
|
bool FUSE_RELU, |
|
|
int C_PER_G, |
|
|
typename BIAS_TYPE = std::int32_t> |
|
|
FBGEMM_API void requantizeOutputProcessingGConvAvx2( |
|
|
std::uint8_t* out, |
|
|
const std::int32_t* inp, |
|
|
const block_type_t& block, |
|
|
int ld_out, |
|
|
int ld_in, |
|
|
const requantizationParams_t<BIAS_TYPE>& r); |
|
|
|
|
|
template < |
|
|
bool A_SYMMETRIC, |
|
|
bool B_SYMMETRIC, |
|
|
QuantizationGranularity Q_GRAN, |
|
|
bool HAS_BIAS, |
|
|
bool FUSE_RELU> |
|
|
FBGEMM_API void requantizeForFloatAvx2( |
|
|
float* out, |
|
|
const std::int32_t* inp, |
|
|
const block_type_t& block, |
|
|
int ld_out, |
|
|
int ld_in, |
|
|
const requantizationForFloatParams_t& r); |
|
|
|
|
|
template <typename InputType, int BIT_RATE> |
|
|
void FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfAvx2( |
|
|
const InputType* input, |
|
|
size_t input_rows, |
|
|
int input_columns, |
|
|
std::uint8_t* output); |
|
|
|
|
|
template <typename InputType> |
|
|
void FloatOrHalfToFused8BitRowwiseQuantizedSBFloatAvx2( |
|
|
const InputType* input, |
|
|
size_t input_rows, |
|
|
int input_columns, |
|
|
std::uint8_t* output, |
|
|
const InputType* rowwise_min_max = nullptr); |
|
|
|
|
|
template <typename OutputType, int BIT_RATE> |
|
|
void FusedNBitRowwiseQuantizedSBHalfToFloatOrHalfAvx2( |
|
|
const std::uint8_t* input, |
|
|
size_t input_rows, |
|
|
int input_columns, |
|
|
OutputType* output); |
|
|
|
|
|
template <typename OutputType> |
|
|
void Fused8BitRowwiseQuantizedSBFloatToFloatOrHalfAvx2( |
|
|
const std::uint8_t* input, |
|
|
size_t input_rows, |
|
|
int input_columns, |
|
|
OutputType* output); |
|
|
|
|
|
} |
|
|
|