| { | |
| "dtype": "float8_e4m3fn", | |
| "weight_scaling": "per_channel_absmax", | |
| "activation_scaling": "dynamic_per_tensor", | |
| "matmul_op": "torch._scaled_mm", | |
| "output_dtype": "bfloat16", | |
| "converted_layers": 417, | |
| "weight_gb_before": 32.074, | |
| "weight_gb_after": 8.65, | |
| "compression_ratio": 3.708, | |
| "quantizer": "streaming_cpu" | |
| } |