afs

by Marc-Anthony - opened Aug 18, 2025

←

This PR is in draft mode

Files changed (4) hide show

README.md CHANGED Viewed

@@ -86,14 +86,14 @@ GLM-4.1V-9B-Thinking integrates the **Chain-of-Thought** reasoning mechanism, im
 For `SGLang` and `transformers`:
 ```bash
-pip install transformers>=4.57.1
-pip install sglang>=0.5.3
 ```
 For `vLLM`:
 ```bash
-pip install vllm>=0.10.2
 ```
 ### Quick Start with Transformers

 For `SGLang` and `transformers`:
 ```bash
+pip install -r https://raw.githubusercontent.com/zai-org/GLM-V/main/requirements.txt
 ```
 For `vLLM`:
 ```bash
+pip install -U vllm --pre --extra-index-url https://wheels.vllm.ai/nightly
+pip install transformers-v4.55.0-GLM-4.5V-preview
 ```
 ### Quick Start with Transformers

config.json CHANGED Viewed

@@ -3,16 +3,7 @@
     "Glm4vMoeForConditionalGeneration"
   ],
   "model_type": "glm4v_moe",
-  "image_start_token_id": 151339,
-  "image_end_token_id": 151340,
-  "video_start_token_id": 151341,
-  "video_end_token_id": 151342,
-  "image_token_id": 151363,
-  "video_token_id": 151364,
-  "tie_word_embeddings": false,
-  "transformers_version": "4.57.1",
   "text_config": {
-    "model_type": "glm4v_moe_text",
     "pad_token_id": 151329,
     "vocab_size": 151552,
     "eos_token_id": [
@@ -20,6 +11,9 @@
       151336,
       151338
     ],
     "head_dim": 128,
     "attention_bias": true,
     "attention_dropout": 0.0,
@@ -29,6 +23,7 @@
     "initializer_range": 0.02,
     "intermediate_size": 10944,
     "max_position_embeddings": 65536,
     "moe_intermediate_size": 1408,
     "n_group": 1,
     "n_routed_experts": 128,
@@ -40,7 +35,7 @@
     "num_key_value_heads": 8,
     "partial_rotary_factor": 0.5,
     "rms_norm_eps": 1e-05,
-    "dtype": "bfloat16",
     "rope_scaling": {
       "rope_type": "default",
       "mrope_section": [
@@ -55,8 +50,12 @@
     "use_cache": true,
     "use_qk_norm": false
   },
   "vision_config": {
-    "model_type": "glm4v_moe",
     "attention_bias": false,
     "attention_dropout": 0.0,
     "depth": 24,
@@ -66,6 +65,7 @@
     "in_channels": 3,
     "initializer_range": 0.02,
     "intermediate_size": 10944,
     "num_heads": 12,
     "out_hidden_size": 4096,
     "patch_size": 14,

     "Glm4vMoeForConditionalGeneration"
   ],
   "model_type": "glm4v_moe",
   "text_config": {
     "pad_token_id": 151329,
     "vocab_size": 151552,
     "eos_token_id": [
       151336,
       151338
     ],
+    "image_end_token_id": 151340,
+    "image_start_token_id": 151339,
+    "image_token_id": 151363,
     "head_dim": 128,
     "attention_bias": true,
     "attention_dropout": 0.0,
     "initializer_range": 0.02,
     "intermediate_size": 10944,
     "max_position_embeddings": 65536,
+    "model_type": "glm4v_moe_text",
     "moe_intermediate_size": 1408,
     "n_group": 1,
     "n_routed_experts": 128,
     "num_key_value_heads": 8,
     "partial_rotary_factor": 0.5,
     "rms_norm_eps": 1e-05,
+    "torch_dtype": "bfloat16",
     "rope_scaling": {
       "rope_type": "default",
       "mrope_section": [
     "use_cache": true,
     "use_qk_norm": false
   },
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.55.0.dev0",
+  "video_end_token_id": 151342,
+  "video_start_token_id": 151341,
+  "video_token_id": 151364,
   "vision_config": {
     "attention_bias": false,
     "attention_dropout": 0.0,
     "depth": 24,
     "in_channels": 3,
     "initializer_range": 0.02,
     "intermediate_size": 10944,
+    "model_type": "glm4v_moe",
     "num_heads": 12,
     "out_hidden_size": 4096,
     "patch_size": 14,

generation_config.json CHANGED Viewed

@@ -10,5 +10,5 @@
   "temperature": 1.0,
   "top_k": 1,
   "top_p": 0.0001,
-  "transformers_version": "4.57.1"
 }

   "temperature": 1.0,
   "top_k": 1,
   "top_p": 0.0001,
+  "transformers_version": "4.55.0.dev"
 }

tokenizer_config.json CHANGED Viewed

@@ -306,8 +306,6 @@
     "<|end_of_video|>",
     "<|begin_of_audio|>",
     "<|end_of_audio|>",
-    "<|image|>",
-    "<|video|>",
     "<|begin_of_transcription|>",
     "<|end_of_transcription|>",
     "<|code_prefix|>",

     "<|end_of_video|>",
     "<|begin_of_audio|>",
     "<|end_of_audio|>",
     "<|begin_of_transcription|>",
     "<|end_of_transcription|>",
     "<|code_prefix|>",