Spaces:
Sleeping
Sleeping
I now have more GPU, therefore I have now reduced the GPU utililisation to 0.8
Browse files- vllm_server.py +1 -1
vllm_server.py
CHANGED
|
@@ -34,7 +34,7 @@ def start_vllm():
|
|
| 34 |
"--model", model_name,
|
| 35 |
"--port", "8000",
|
| 36 |
"--host", "0.0.0.0",
|
| 37 |
-
"--gpu-memory-utilization", "0.
|
| 38 |
"--max-model-len", "4096",
|
| 39 |
"--max-num-seqs", "8",
|
| 40 |
"--swap-space", "4",
|
|
|
|
| 34 |
"--model", model_name,
|
| 35 |
"--port", "8000",
|
| 36 |
"--host", "0.0.0.0",
|
| 37 |
+
"--gpu-memory-utilization", "0.8",
|
| 38 |
"--max-model-len", "4096",
|
| 39 |
"--max-num-seqs", "8",
|
| 40 |
"--swap-space", "4",
|