File size: 1,092 Bytes
8595613
 
 
 
046b3ac
8595613
046b3ac
8595613
 
 
6b5f9db
8595613
 
 
 
 
046b3ac
8595613
046b3ac
8595613
 
 
 
046b3ac
 
 
 
8595613
 
 
 
046b3ac
8595613
 
 
 
 
5d767ea
8595613
 
046b3ac
8595613
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
#!/usr/bin/env bash
set -euo pipefail

MODEL_NAME="${MODEL_NAME:-NM-dev/NuExtract3.4_4B-RL-400}"
VLLM_PORT="${VLLM_PORT:-8001}"
GRADIO_PORT="${GRADIO_SERVER_PORT:-7860}"
API_KEY="${OPENAI_API_KEY:-EMPTY}"

echo "Starting vLLM with model: ${MODEL_NAME}"

vllm serve "${MODEL_NAME}" \
  --served-model-name "${MODEL_NAME}" \
  --host 127.0.0.1 \
  --port "${VLLM_PORT}" \
  --trust-remote-code \
  --dtype auto \
  --max-model-len "${MAX_MODEL_LEN:-8192}" \
  --gpu-memory-utilization "${GPU_MEMORY_UTILIZATION:-0.90}" \
  --api-key "${API_KEY}" &

VLLM_PID=$!

echo "Waiting for vLLM to become ready..."
until curl -sf \
  -H "Authorization: Bearer ${API_KEY}" \
  "http://127.0.0.1:${VLLM_PORT}/v1/models" >/dev/null; do

  if ! kill -0 "${VLLM_PID}" 2>/dev/null; then
    echo "vLLM exited before becoming ready."
    exit 1
  fi

  sleep 2
done

echo "vLLM is ready. Starting Gradio..."

python3 /home/user/app/app.py \
  --model-name "${MODEL_NAME}" \
  --api-base "http://127.0.0.1:${VLLM_PORT}/v1" \
  --api-key "${API_KEY}" \
  --server-name "0.0.0.0" \
  --server-port "${GRADIO_PORT}"