| { | |
| "quant_method": "channel_wise_int8", | |
| "bits": 8, | |
| "format": "per_output_channel_scale", | |
| "weight_dtype": "I8", | |
| "scale_dtype": "BF16", | |
| "description": "Each weight tensor 'foo' is stored as I8 values in 'foo' with a BF16 per-output-channel scale in 'foo_scale'. Dequantize: weight_bf16 = weight_i8 * scale_bf16." | |
| } | |