{
  "dtype": "float8_e4m3fn",
  "weight_scaling": "per_channel_absmax",
  "activation_scaling": "dynamic_per_tensor",
  "matmul_op": "torch._scaled_mm",
  "output_dtype": "bfloat16",
  "converted_layers": 417,
  "weight_gb_before": 32.074,
  "weight_gb_after": 8.65,
  "compression_ratio": 3.708,
  "quantizer": "streaming_cpu"
}