Upload quantization_info.json with huggingface_hub
Browse files- quantization_info.json +21 -0
quantization_info.json
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"quantization_method": "per_row_symmetric_fp8",
|
| 3 |
+
"weight_dtype": "float8_e4m3fn",
|
| 4 |
+
"scale_dtype": "float32",
|
| 5 |
+
"scale_granularity": "per_row",
|
| 6 |
+
"activation_dtype": "bfloat16",
|
| 7 |
+
"torch_version": "2.10.0+cu128",
|
| 8 |
+
"source_model": "fishaudio/s2-pro",
|
| 9 |
+
"total_params_B": 4.562,
|
| 10 |
+
"fp8_linear_params_B": 4.048,
|
| 11 |
+
"bf16_other_params_B": 0.514,
|
| 12 |
+
"output_size_GB": 4.73,
|
| 13 |
+
"linear_layers_quantized": 201,
|
| 14 |
+
"key_format": {
|
| 15 |
+
"<layer_name>": "float8_e4m3fn quantized weight",
|
| 16 |
+
"<layer_name>.scale": "float32 per-row dequantization scale",
|
| 17 |
+
"_buf.<name>": "bf16/fp32 buffer (freqs_cis, causal_mask, etc.)",
|
| 18 |
+
"other": "bfloat16 (embeddings, norms, non-linear layers)"
|
| 19 |
+
},
|
| 20 |
+
"notes": "All nn.Linear weights are float8_e4m3fn with per-row scales. Activations are bfloat16 (weight-only quantization). codec.pth is unchanged bfloat16. Created by AEmotionStudio/ComfyUI-FFMPEGA."
|
| 21 |
+
}
|