{ "dtype": "float8_e4m3fn", "weight_scaling": "per_channel_absmax", "activation_scaling": "dynamic_per_tensor", "matmul_op": "torch._scaled_mm", "output_dtype": "bfloat16", "converted_layers": 417, "weight_gb_before": 32.074, "weight_gb_after": 8.65, "compression_ratio": 3.708, "quantizer": "streaming_cpu" }