Upload deploy_medguide_v.sh with huggingface_hub
Browse files- deploy_medguide_v.sh +14 -0
deploy_medguide_v.sh
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
|
| 2 |
+
export VLLM_ALLOW_LONG_MAX_MODEL_LEN=1
|
| 3 |
+
export VLLM_USE_V1=0
|
| 4 |
+
MODEL_PATH=''
|
| 5 |
+
vllm serve $MODEL_PATH \
|
| 6 |
+
--served-model-name medguide-v \
|
| 7 |
+
--port 8231 \
|
| 8 |
+
--host 0.0.0.0 \
|
| 9 |
+
--dtype bfloat16 \
|
| 10 |
+
--limit-mm-per-prompt image=6,video=0 \
|
| 11 |
+
--max-model-len 12000 \
|
| 12 |
+
--gpu-memory-utilization 0.9 \
|
| 13 |
+
--tensor-parallel-size 8 \
|
| 14 |
+
--api-key medguide-v
|