File size: 985 Bytes
d3a2cd8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
{
  "quantization_method": "torchao_Float8WeightOnly",
  "weight_dtype": "float8_e4m3fn",
  "scale_dtype": "float32",
  "scale_granularity": "per_row",
  "activation_dtype": "bfloat16",
  "torchao_version": "0.16.0",
  "torch_version": "2.9.0+cu128",
  "source_model": "fishaudio/s2-pro",
  "total_params_B": 4.562,
  "fp8_linear_params_B": 4.048,
  "bf16_other_params_B": 0.514,
  "output_size_GB": 6.16,
  "key_format": {
    "<layer_name>": "float8_e4m3fn quantized weight",
    "<layer_name>.scale": "float32 per-row dequantization scale",
    "_buf.<name>": "bf16/fp32 buffer (freqs_cis, causal_mask, etc.)",
    "other": "bfloat16 (embeddings, norms, non-linear layers)"
  },
  "inference_requirements": {
    "torchao": ">= 0.8.0",
    "compute_capability": ">= 8.9 (RTX 4090 / 5090) for native FP8 matmuls"
  },
  "notes": "All nn.Linear weights are float8_e4m3fn. Activations are bfloat16 (weight-only quantization). codec.pth is unchanged bfloat16."
}