Add quantization_config for FP8
Browse files- config.json +6 -0
config.json
CHANGED
|
@@ -98,6 +98,12 @@
|
|
| 98 |
"topk_group": 1,
|
| 99 |
"topk_method": "noaux_tc",
|
| 100 |
"transformers_version": "5.0.0.dev0",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
"use_cache": true,
|
| 102 |
"v_head_dim": 256,
|
| 103 |
"vocab_size": 154880
|
|
|
|
| 98 |
"topk_group": 1,
|
| 99 |
"topk_method": "noaux_tc",
|
| 100 |
"transformers_version": "5.0.0.dev0",
|
| 101 |
+
"quantization_config": {
|
| 102 |
+
"quant_method": "fp8",
|
| 103 |
+
"activation_scheme": "dynamic",
|
| 104 |
+
"is_checkpoint_fp8_serialized": true,
|
| 105 |
+
"weight_block_size": [128, 128]
|
| 106 |
+
},
|
| 107 |
"use_cache": true,
|
| 108 |
"v_head_dim": 256,
|
| 109 |
"vocab_size": 154880
|