RedHatAI
/

GLM-4.6-NVFP4

Text Generation

8-bit precision

compressed-tensors

Model card Files Files and versions

nm-research commited on Feb 27

Commit

271c2d2

·

verified ·

1 Parent(s): 1ed4d35

Update README.md

Files changed (1) hide show

README.md +3 -3

README.md CHANGED Viewed

@@ -217,7 +217,7 @@ The results were obtained using the following commands:
 ```
 lm_eval --model local-chat-completions \
   --tasks mmlu_pro  \
-  --model_args "model=RedHatAI/glm-4.6-FP8-NVFP4,max_length=90000,base_url=http://0.0.0.0:3758/v1/chat/completions,num_concurrent=128,max_retries=3,tokenized_requests=False,tokenizer_backend=None,timeout=1200" \
   --num_fewshot 5 \
   --apply_chat_template \
   --fewshot_as_multiturn \
@@ -228,7 +228,7 @@ lm_eval --model local-chat-completions \
 lm_eval --model local-chat-completions \
   --tasks leaderboard_ifeval  \
-  --model_args "model=RedHatAI/glm-4.6-FP8-NVFP4,max_length=90000,base_url=http://0.0.0.0:3758/v1/chat/completions,num_concurrent=128,max_retries=3,tokenized_requests=False,tokenizer_backend=None,timeout=1200" \
   --num_fewshot 5 \
   --apply_chat_template \
   --fewshot_as_multiturn \
@@ -244,7 +244,7 @@ litellm_config.yaml:
 model_parameters:
   provider: "hosted_vllm"
-  model_name: "hosted_vllm/zai-org-glm-4.6-fp8"
   base_url: "http://0.0.0.0:3759/v1"
   api_key: ""
   timeout: 3600

 ```
 lm_eval --model local-chat-completions \
   --tasks mmlu_pro  \
+  --model_args "model=RedHatAI/GLM-4.6-NVFP4,max_length=90000,base_url=http://0.0.0.0:3758/v1/chat/completions,num_concurrent=128,max_retries=3,tokenized_requests=False,tokenizer_backend=None,timeout=1200" \
   --num_fewshot 5 \
   --apply_chat_template \
   --fewshot_as_multiturn \
 lm_eval --model local-chat-completions \
   --tasks leaderboard_ifeval  \
+  --model_args "model=RedHatAI/GLM-4.6-NVFP4,max_length=90000,base_url=http://0.0.0.0:3758/v1/chat/completions,num_concurrent=128,max_retries=3,tokenized_requests=False,tokenizer_backend=None,timeout=1200" \
   --num_fewshot 5 \
   --apply_chat_template \
   --fewshot_as_multiturn \
 model_parameters:
   provider: "hosted_vllm"
+  model_name: "hosted_vllm/redhatai-glm-4.6-nvfp4"
   base_url: "http://0.0.0.0:3759/v1"
   api_key: ""
   timeout: 3600