File size: 575 Bytes
91865ac
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
{
  "quantization_method": "fp8_e4m3_per_channel_dynamic_act_rowwise",
  "weight_dtype": "float8_e4m3fn",
  "weight_scale_shape": "(out_features,)",
  "weight_scale_dtype": "float32",
  "activation_dtype": "float8_e4m3fn",
  "activation_scale": "dynamic_per_row",
  "skip_patterns": [
    "final_layer.linear"
  ],
  "compute_dtype": "bfloat16",
  "description": "Per-output-channel symmetric FP8 (e4m3) weight quantization with dynamic per-row activation quantization. Matmul via torch._scaled_mm (RowWise) on Hopper. Layers matching `skip_patterns` are kept in bfloat16."
}