chaechae7's picture
Upload transformer-only FP8 quantized runtime (scaled fp8)
a8eeee1 verified
{
"text_encoder": {
"mode": "copied_unchanged"
},
"transformer": {
"num_shards": 7,
"num_output_entries": 534,
"num_fp8_quantized": 203,
"num_weight_scales_added": 203,
"num_non_fp8_copied": 128,
"total_size": 32231164544
}
}