|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#pragma once
|
|
|
|
|
|
|
|
|
|
|
|
#include <cstdint>
|
|
|
#include <string>
|
|
|
|
|
|
namespace fbgemm {
|
|
|
|
|
|
enum class FBGEMM_ENUM_CLASS_API QuantizationGranularity {
|
|
|
TENSOR,
|
|
|
GROUP,
|
|
|
OUT_CHANNEL,
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
struct FBGEMM_API block_type_t {
|
|
|
int row_start;
|
|
|
int row_size;
|
|
|
int col_start;
|
|
|
int col_size;
|
|
|
|
|
|
std::string toString() const {
|
|
|
std::string out = "";
|
|
|
out += "row start:" + std::to_string(row_start) + ", ";
|
|
|
out += "row size:" + std::to_string(row_size) + ", ";
|
|
|
out += "col start:" + std::to_string(col_start) + ", ";
|
|
|
out += "col size:" + std::to_string(col_size);
|
|
|
return out;
|
|
|
}
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
template <typename BIAS_TYPE = std::int32_t>
|
|
|
struct requantizationParams_t {
|
|
|
using BIAS_T = BIAS_TYPE;
|
|
|
std::int32_t A_zero_point;
|
|
|
const std::int32_t* B_zero_point;
|
|
|
std::int32_t C_zero_point;
|
|
|
const float* C_multiplier;
|
|
|
const std::int32_t* row_offsets;
|
|
|
const std::int32_t* col_offsets;
|
|
|
const BIAS_T* bias;
|
|
|
std::uint32_t ncols;
|
|
|
int groups;
|
|
|
const float* act_times_w_scale;
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
struct requantizationForFloatParams_t {
|
|
|
std::int32_t A_zero_point;
|
|
|
const std::int32_t* B_zero_point;
|
|
|
float A_scale;
|
|
|
const float* B_scale;
|
|
|
const std::int32_t* row_offsets;
|
|
|
const std::int32_t* col_offsets;
|
|
|
const float* bias;
|
|
|
std::uint32_t ncols;
|
|
|
int groups;
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
FBGEMM_API void*
|
|
|
fbgemmAlignedAlloc(size_t align, size_t size, bool raiseException = false);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
FBGEMM_API void fbgemmAlignedFree(void* p);
|
|
|
|
|
|
}
|
|
|
|