Alexandre-Numind commited on
Commit
046b3ac
·
verified ·
1 Parent(s): 9d6b0ef

Update start.sh

Browse files
Files changed (1) hide show
  1. start.sh +11 -5
start.sh CHANGED
@@ -2,8 +2,9 @@
2
  set -euo pipefail
3
 
4
  MODEL_NAME="${MODEL_NAME:-NM-dev/NuExtract3.4_4B-RL-400}"
5
- VLLM_PORT="${VLLM_PORT:-8000}"
6
  GRADIO_PORT="${GRADIO_SERVER_PORT:-7860}"
 
7
 
8
  echo "Starting vLLM with model: ${MODEL_NAME}"
9
 
@@ -13,18 +14,23 @@ vllm serve "${MODEL_NAME}" \
13
  --port "${VLLM_PORT}" \
14
  --trust-remote-code \
15
  --dtype auto \
16
- --max-model-len "${MAX_MODEL_LEN:-12000}" \
17
  --gpu-memory-utilization "${GPU_MEMORY_UTILIZATION:-0.90}" \
18
- --api-key "${OPENAI_API_KEY:-EMPTY}" &
 
19
 
20
  VLLM_PID=$!
21
 
22
  echo "Waiting for vLLM to become ready..."
23
- until curl -sf "http://127.0.0.1:${VLLM_PORT}/v1/models" >/dev/null; do
 
 
 
24
  if ! kill -0 "${VLLM_PID}" 2>/dev/null; then
25
  echo "vLLM exited before becoming ready."
26
  exit 1
27
  fi
 
28
  sleep 2
29
  done
30
 
@@ -33,6 +39,6 @@ echo "vLLM is ready. Starting Gradio..."
33
  python /home/user/app/app.py \
34
  --model-name "${MODEL_NAME}" \
35
  --api-base "http://127.0.0.1:${VLLM_PORT}/v1" \
36
- --api-key "${OPENAI_API_KEY:-EMPTY}" \
37
  --server-name "0.0.0.0" \
38
  --server-port "${GRADIO_PORT}"
 
2
  set -euo pipefail
3
 
4
  MODEL_NAME="${MODEL_NAME:-NM-dev/NuExtract3.4_4B-RL-400}"
5
+ VLLM_PORT="${VLLM_PORT:-8001}"
6
  GRADIO_PORT="${GRADIO_SERVER_PORT:-7860}"
7
+ API_KEY="${OPENAI_API_KEY:-EMPTY}"
8
 
9
  echo "Starting vLLM with model: ${MODEL_NAME}"
10
 
 
14
  --port "${VLLM_PORT}" \
15
  --trust-remote-code \
16
  --dtype auto \
17
+ --max-model-len "${MAX_MODEL_LEN:-8192}" \
18
  --gpu-memory-utilization "${GPU_MEMORY_UTILIZATION:-0.90}" \
19
+ --limit-mm-per-prompt image=1 \
20
+ --api-key "${API_KEY}" &
21
 
22
  VLLM_PID=$!
23
 
24
  echo "Waiting for vLLM to become ready..."
25
+ until curl -sf \
26
+ -H "Authorization: Bearer ${API_KEY}" \
27
+ "http://127.0.0.1:${VLLM_PORT}/v1/models" >/dev/null; do
28
+
29
  if ! kill -0 "${VLLM_PID}" 2>/dev/null; then
30
  echo "vLLM exited before becoming ready."
31
  exit 1
32
  fi
33
+
34
  sleep 2
35
  done
36
 
 
39
  python /home/user/app/app.py \
40
  --model-name "${MODEL_NAME}" \
41
  --api-base "http://127.0.0.1:${VLLM_PORT}/v1" \
42
+ --api-key "${API_KEY}" \
43
  --server-name "0.0.0.0" \
44
  --server-port "${GRADIO_PORT}"