AEmotionStudio commited on
Commit
264ed96
·
verified ·
1 Parent(s): 6b976bd

Upload quantization_info.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. quantization_info.json +21 -0
quantization_info.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "quantization_method": "per_row_symmetric_fp8",
3
+ "weight_dtype": "float8_e4m3fn",
4
+ "scale_dtype": "float32",
5
+ "scale_granularity": "per_row",
6
+ "activation_dtype": "bfloat16",
7
+ "torch_version": "2.10.0+cu128",
8
+ "source_model": "fishaudio/s2-pro",
9
+ "total_params_B": 4.562,
10
+ "fp8_linear_params_B": 4.048,
11
+ "bf16_other_params_B": 0.514,
12
+ "output_size_GB": 4.73,
13
+ "linear_layers_quantized": 201,
14
+ "key_format": {
15
+ "<layer_name>": "float8_e4m3fn quantized weight",
16
+ "<layer_name>.scale": "float32 per-row dequantization scale",
17
+ "_buf.<name>": "bf16/fp32 buffer (freqs_cis, causal_mask, etc.)",
18
+ "other": "bfloat16 (embeddings, norms, non-linear layers)"
19
+ },
20
+ "notes": "All nn.Linear weights are float8_e4m3fn with per-row scales. Activations are bfloat16 (weight-only quantization). codec.pth is unchanged bfloat16. Created by AEmotionStudio/ComfyUI-FFMPEGA."
21
+ }