marksverdhei
/

GLM-4.7-Flash-FP8

Text Generation

Mixture of Experts

Model card Files Files and versions

marksverdhei commited on 4 days ago

Commit

71ad4b3

·

verified ·

1 Parent(s): 02419e3

Add quantization_config for FP8

Files changed (1) hide show

config.json +6 -0

config.json CHANGED Viewed

@@ -98,6 +98,12 @@
   "topk_group": 1,
   "topk_method": "noaux_tc",
   "transformers_version": "5.0.0.dev0",
   "use_cache": true,
   "v_head_dim": 256,
   "vocab_size": 154880

   "topk_group": 1,
   "topk_method": "noaux_tc",
   "transformers_version": "5.0.0.dev0",
+  "quantization_config": {
+    "quant_method": "fp8",
+    "activation_scheme": "dynamic",
+    "is_checkpoint_fp8_serialized": true,
+    "weight_block_size": [128, 128]
+  },
   "use_cache": true,
   "v_head_dim": 256,
   "vocab_size": 154880