Update README.md
Browse files
README.md
CHANGED
|
@@ -45,7 +45,7 @@ NVFP4 (4-bit floating point) quantized version of [zai-org/GLM-4.6V](https://hug
|
|
| 45 |
### Launch Command
|
| 46 |
|
| 47 |
```bash
|
| 48 |
-
# Single GPU (
|
| 49 |
python -m vllm.entrypoints.openai.api_server \
|
| 50 |
--model GadflyII/GLM-4.6V-NVFP4 \
|
| 51 |
--tensor-parallel-size 1 \
|
|
@@ -53,15 +53,7 @@ python -m vllm.entrypoints.openai.api_server \
|
|
| 53 |
--max-model-len 131072 \
|
| 54 |
--port 8000
|
| 55 |
|
| 56 |
-
#
|
| 57 |
-
python -m vllm.entrypoints.openai.api_server \
|
| 58 |
-
--model GadflyII/GLM-4.6V-NVFP4 \
|
| 59 |
-
--tensor-parallel-size 1 \
|
| 60 |
-
--trust-remote-code \
|
| 61 |
-
--max-model-len 131072 \
|
| 62 |
-
--port 8000
|
| 63 |
-
|
| 64 |
-
# Two GPUs (for 48GB cards)
|
| 65 |
python -m vllm.entrypoints.openai.api_server \
|
| 66 |
--model GadflyII/GLM-4.6V-NVFP4 \
|
| 67 |
--tensor-parallel-size 2 \
|
|
|
|
| 45 |
### Launch Command
|
| 46 |
|
| 47 |
```bash
|
| 48 |
+
# Single GPU (full 128K context)
|
| 49 |
python -m vllm.entrypoints.openai.api_server \
|
| 50 |
--model GadflyII/GLM-4.6V-NVFP4 \
|
| 51 |
--tensor-parallel-size 1 \
|
|
|
|
| 53 |
--max-model-len 131072 \
|
| 54 |
--port 8000
|
| 55 |
|
| 56 |
+
# Two GPUs
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
python -m vllm.entrypoints.openai.api_server \
|
| 58 |
--model GadflyII/GLM-4.6V-NVFP4 \
|
| 59 |
--tensor-parallel-size 2 \
|