GadflyII commited on
Commit
6a18ef6
·
verified ·
1 Parent(s): 9dc7907

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +2 -10
README.md CHANGED
@@ -45,7 +45,7 @@ NVFP4 (4-bit floating point) quantized version of [zai-org/GLM-4.6V](https://hug
45
  ### Launch Command
46
 
47
  ```bash
48
- # Single GPU (96GB VRAM - full 128K context)
49
  python -m vllm.entrypoints.openai.api_server \
50
  --model GadflyII/GLM-4.6V-NVFP4 \
51
  --tensor-parallel-size 1 \
@@ -53,15 +53,7 @@ python -m vllm.entrypoints.openai.api_server \
53
  --max-model-len 131072 \
54
  --port 8000
55
 
56
- # Single GPU (80GB VRAM - reduced context)
57
- python -m vllm.entrypoints.openai.api_server \
58
- --model GadflyII/GLM-4.6V-NVFP4 \
59
- --tensor-parallel-size 1 \
60
- --trust-remote-code \
61
- --max-model-len 131072 \
62
- --port 8000
63
-
64
- # Two GPUs (for 48GB cards)
65
  python -m vllm.entrypoints.openai.api_server \
66
  --model GadflyII/GLM-4.6V-NVFP4 \
67
  --tensor-parallel-size 2 \
 
45
  ### Launch Command
46
 
47
  ```bash
48
+ # Single GPU (full 128K context)
49
  python -m vllm.entrypoints.openai.api_server \
50
  --model GadflyII/GLM-4.6V-NVFP4 \
51
  --tensor-parallel-size 1 \
 
53
  --max-model-len 131072 \
54
  --port 8000
55
 
56
+ # Two GPUs
 
 
 
 
 
 
 
 
57
  python -m vllm.entrypoints.openai.api_server \
58
  --model GadflyII/GLM-4.6V-NVFP4 \
59
  --tensor-parallel-size 2 \