|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#pragma once |
|
|
|
|
|
#include <cstdint> |
|
|
#include <memory> |
|
|
#include <vector> |
|
|
|
|
|
#include "fbgemm/FbgemmBuild.h" |
|
|
#include "fbgemm/UtilsAvx2.h" |
|
|
#include "fbgemm/spmmUtilsAvx2.h" |
|
|
|
|
|
namespace fbgemm { |
|
|
|
|
|
template <typename T> |
|
|
struct FBGEMM_API CSRMatrix { |
|
|
std::vector<int> rowPtr; |
|
|
std::vector<int> colIdx; |
|
|
std::vector<T> values; |
|
|
}; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
template <typename T = std::int8_t, int ROW_BLOCK = 1, int COL_BLOCK = 4> |
|
|
struct FBGEMM_API BCSRMatrix { |
|
|
using DTYPE = T; |
|
|
static constexpr int RB = ROW_BLOCK; |
|
|
static constexpr int CB = COL_BLOCK; |
|
|
|
|
|
|
|
|
static constexpr int COLTILE = 4000; |
|
|
std::vector<int> rowBPtr; |
|
|
std::vector<int> colBIdx; |
|
|
std::vector<DTYPE> values; |
|
|
|
|
|
std::vector<int32_t> row_offsets; |
|
|
int R; |
|
|
int C; |
|
|
|
|
|
BCSRMatrix(int Rows, int Cols) { |
|
|
R = Rows; |
|
|
C = Cols; |
|
|
row_offsets.resize(R, 0); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void pack(const DTYPE* src, size_t ld); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void pack(const DTYPE* src); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void unpack(DTYPE* dst, size_t ld); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void unpack(DTYPE* dst); |
|
|
}; |
|
|
|
|
|
template <typename T> |
|
|
FBGEMM_API std::unique_ptr<CSRMatrix<T>> |
|
|
fbgemmDenseToCSR(int R, int C, const T* inp, int ld); |
|
|
|
|
|
template <typename T> |
|
|
FBGEMM_API std::unique_ptr<CSRMatrix<T>> |
|
|
fbgemmDenseToCSR(int R, int C, const T* inp); |
|
|
|
|
|
template <typename T = std::int8_t, int RB = 1, int CB = 4> |
|
|
FBGEMM_API std::unique_ptr<BCSRMatrix<T, RB, CB>> |
|
|
fbgemmDenseToBCSR(int R, int C, const T* inp, int ld); |
|
|
|
|
|
template <typename T = std::int8_t, int RB = 1, int CB = 4> |
|
|
FBGEMM_API std::unique_ptr<BCSRMatrix<T, RB, CB>> |
|
|
fbgemmDenseToBCSR(int R, int C, const T* inp); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
FBGEMM_API void SparseDenseMM( |
|
|
int M, |
|
|
int N, |
|
|
const int* row_ptr, |
|
|
const int* col_idx, |
|
|
const float* values, |
|
|
const float* B, |
|
|
int ldb, |
|
|
float* C, |
|
|
int ldc, |
|
|
bool accum = false); |
|
|
|
|
|
template <bool FUSE_RELU, QuantizationGranularity Q_GRAN> |
|
|
FBGEMM_API void fbgemmSparseDenseInt8MM( |
|
|
int N, |
|
|
const std::unique_ptr<BCSRMatrix<>>& bcsr, |
|
|
const uint8_t* B, |
|
|
int ldb, |
|
|
int32_t* C_i32, |
|
|
uint8_t* C_u8, |
|
|
int ldc, |
|
|
trRequantizationParams_t& rParams, |
|
|
bool accum = false, |
|
|
int thread_id = 0, |
|
|
int num_threads = 1); |
|
|
|
|
|
namespace internal { |
|
|
|
|
|
void SparseDenseMMAvx2( |
|
|
int M, |
|
|
int N, |
|
|
const int* row_ptr, |
|
|
const int* col_idx, |
|
|
const float* values, |
|
|
const float* B, |
|
|
int ldb, |
|
|
float* C, |
|
|
int ldc, |
|
|
bool accum = false); |
|
|
|
|
|
#if defined(FBGEMM_FBCODE) || !defined(__aarch64__) |
|
|
void SparseDenseMMAvx512( |
|
|
int M, |
|
|
int N, |
|
|
const int* row_ptr, |
|
|
const int* col_idx, |
|
|
const float* values, |
|
|
const float* B, |
|
|
int ldb, |
|
|
float* C, |
|
|
int ldc, |
|
|
bool accum = false); |
|
|
|
|
|
template <bool FUSE_RELU, QuantizationGranularity Q_GRAN> |
|
|
void SparseDenseInt8MMAvx2( |
|
|
int N, |
|
|
const std::unique_ptr<BCSRMatrix<>>& bcsr, |
|
|
const uint8_t* B, |
|
|
int ldb, |
|
|
int32_t* C_i32, |
|
|
uint8_t* C_u8, |
|
|
int ldc, |
|
|
trRequantizationParams_t& rParams, |
|
|
bool accum = false, |
|
|
int thread_id = 0, |
|
|
int num_threads = 1); |
|
|
|
|
|
template <bool FUSE_RELU, QuantizationGranularity Q_GRAN> |
|
|
void SparseDenseInt8MMAvx512( |
|
|
int N, |
|
|
const std::unique_ptr<BCSRMatrix<>>& bcsr, |
|
|
const uint8_t* B, |
|
|
int ldb, |
|
|
int32_t* C_i32, |
|
|
uint8_t* C_u8, |
|
|
int ldc, |
|
|
trRequantizationParams_t& rParams, |
|
|
bool accum = false, |
|
|
int thread_id = 0, |
|
|
int num_threads = 1); |
|
|
|
|
|
template <bool FUSE_RELU, QuantizationGranularity Q_GRAN> |
|
|
void SparseDenseInt8MVAvx512( |
|
|
const std::unique_ptr<BCSRMatrix<>>& bcsr, |
|
|
const uint8_t* B, |
|
|
int ldb, |
|
|
int32_t* C_i32, |
|
|
uint8_t* C_u8, |
|
|
trRequantizationParams_t& rParams, |
|
|
bool accum = false, |
|
|
int thread_id = 0, |
|
|
int num_threads = 1); |
|
|
#endif |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|