File size: 2,379 Bytes
d1d4335
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
/*

 * Copyright (c) Meta Platforms, Inc. and affiliates.

 * All rights reserved.

 *

 * This source code is licensed under the BSD-style license found in the

 * LICENSE file in the root directory of this source tree.

 */

#pragma once
// This file defines common utilities used in code compiled with avx2/avx512
// flags.

#include <cstdint>
#include <string>

namespace fbgemm {

enum class FBGEMM_ENUM_CLASS_API QuantizationGranularity {
  TENSOR,
  GROUP,
  OUT_CHANNEL,
};

/**

 * @brief A struct to represent a block of a matrix.

 */
struct FBGEMM_API block_type_t {
  int row_start;
  int row_size;
  int col_start;
  int col_size;

  std::string toString() const {
    std::string out = "";
    out += "row start:" + std::to_string(row_start) + ", ";
    out += "row size:" + std::to_string(row_size) + ", ";
    out += "col start:" + std::to_string(col_start) + ", ";
    out += "col size:" + std::to_string(col_size);
    return out;
  }
};

/**

 * @brief A struct to represent all the requantization parameters.

 *

 * Please note that this is different from RequantizationParams in

 * QuantUtilsAvx2.h as it combines all the parameters needed for various

 * quantization granularities

 */
template <typename BIAS_TYPE = std::int32_t>
struct requantizationParams_t {
  using BIAS_T = BIAS_TYPE;
  std::int32_t A_zero_point;
  const std::int32_t* B_zero_point;
  std::int32_t C_zero_point;
  const float* C_multiplier;
  const std::int32_t* row_offsets;
  const std::int32_t* col_offsets;
  const BIAS_T* bias;
  std::uint32_t ncols;
  int groups;
  const float* act_times_w_scale;
};

/**

 * @brief A struct to represent all the parameters for requantizing for floats.

 */
struct requantizationForFloatParams_t {
  std::int32_t A_zero_point;
  const std::int32_t* B_zero_point;
  float A_scale;
  const float* B_scale;
  const std::int32_t* row_offsets;
  const std::int32_t* col_offsets;
  const float* bias;
  std::uint32_t ncols;
  int groups;
};

/**

 * @brief Allocate size bytes of uninitialized storage whose alignment is

 * specified by align.

 */
FBGEMM_API void*
fbgemmAlignedAlloc(size_t align, size_t size, bool raiseException = false);

/**

 * @brief Free memory allocated by fbgemmAlignedAlloc

 */
FBGEMM_API void fbgemmAlignedFree(void* p);

} // namespace fbgemm