File size: 2,348 Bytes
5612054
cc3c56d
53b6f17
 
 
 
 
 
cc3c56d
55913e8
 
 
 
53b6f17
e2b0585
53b6f17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#!/bin/bash

# Configuración
MODEL_PATH="/home/agent_h/data/starvector-1b-im2svg"
CHAT_TEMPLATE="configs/chat-template.jinja"
MAX_MODEL_LEN=8192

# SSH Tunnels (opcionales)
bash -c "$SSH_TUNNEL_CMD_1" &
bash -c "$SSH_TUNNEL_CMD_2" &
bash -c "$SSH_TUNNEL_CMD_3" &
bash -c "$SSH_TUNNEL_CMD_4" &

# Controller
python -m starvector.serve.vllm_api_gradio.controller --host 0.0.0.0 --port 10000 &

# Esperar a que el controller inicie
sleep 5

# ====== AÑADIR ESTOS COMANDOS - Iniciar servidores vLLM ======
echo "Iniciando servidores vLLM..."

vllm serve $MODEL_PATH \
    --chat-template $CHAT_TEMPLATE \
    --trust-remote-code \
    --port 8000 \
    --max-model-len $MAX_MODEL_LEN &

vllm serve $MODEL_PATH \
    --chat-template $CHAT_TEMPLATE \
    --trust-remote-code \
    --port 8001 \
    --max-model-len $MAX_MODEL_LEN &

vllm serve $MODEL_PATH \
    --chat-template $CHAT_TEMPLATE \
    --trust-remote-code \
    --port 8002 \
    --max-model-len $MAX_MODEL_LEN &

vllm serve $MODEL_PATH \
    --chat-template $CHAT_TEMPLATE \
    --trust-remote-code \
    --port 8003 \
    --max-model-len $MAX_MODEL_LEN &

# Esperar a que vLLM inicie (puede tardar varios minutos)
echo "Esperando a que vLLM cargue el modelo (esto puede tardar 2-3 minutos)..."
sleep 180

# ====== FIN DE LA MODIFICACIÓN ======

# Model Workers
python -m starvector.serve.vllm_api_gradio.model_worker --host 0.0.0.0 --controller http://localhost:10000 --port 40000 --worker http://localhost:40000 --model-name $MODEL_PATH --vllm-base-url http://localhost:8000 &

python -m starvector.serve.vllm_api_gradio.model_worker --host 0.0.0.0 --controller http://localhost:10000 --port 40001 --worker http://localhost:40001 --model-name $MODEL_PATH --vllm-base-url http://localhost:8001 &

python -m starvector.serve.vllm_api_gradio.model_worker --host 0.0.0.0 --controller http://localhost:10000 --port 40002 --worker http://localhost:40002 --model-name $MODEL_PATH --vllm-base-url http://localhost:8002 &

python -m starvector.serve.vllm_api_gradio.model_worker --host 0.0.0.0 --controller http://localhost:10000 --port 40003 --worker http://localhost:40003 --model-name $MODEL_PATH --vllm-base-url http://localhost:8003 &

# Gradio Web Server
python -m starvector.serve.vllm_api_gradio.gradio_web_server --controller http://localhost:10000 --model-list-mode reload --port 7860