Spaces:
Running on A100
Running on A100
File size: 1,092 Bytes
8595613 046b3ac 8595613 046b3ac 8595613 6b5f9db 8595613 046b3ac 8595613 046b3ac 8595613 046b3ac 8595613 046b3ac 8595613 5d767ea 8595613 046b3ac 8595613 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 | #!/usr/bin/env bash
set -euo pipefail
MODEL_NAME="${MODEL_NAME:-NM-dev/NuExtract3.4_4B-RL-400}"
VLLM_PORT="${VLLM_PORT:-8001}"
GRADIO_PORT="${GRADIO_SERVER_PORT:-7860}"
API_KEY="${OPENAI_API_KEY:-EMPTY}"
echo "Starting vLLM with model: ${MODEL_NAME}"
vllm serve "${MODEL_NAME}" \
--served-model-name "${MODEL_NAME}" \
--host 127.0.0.1 \
--port "${VLLM_PORT}" \
--trust-remote-code \
--dtype auto \
--max-model-len "${MAX_MODEL_LEN:-8192}" \
--gpu-memory-utilization "${GPU_MEMORY_UTILIZATION:-0.90}" \
--api-key "${API_KEY}" &
VLLM_PID=$!
echo "Waiting for vLLM to become ready..."
until curl -sf \
-H "Authorization: Bearer ${API_KEY}" \
"http://127.0.0.1:${VLLM_PORT}/v1/models" >/dev/null; do
if ! kill -0 "${VLLM_PID}" 2>/dev/null; then
echo "vLLM exited before becoming ready."
exit 1
fi
sleep 2
done
echo "vLLM is ready. Starting Gradio..."
python3 /home/user/app/app.py \
--model-name "${MODEL_NAME}" \
--api-base "http://127.0.0.1:${VLLM_PORT}/v1" \
--api-key "${API_KEY}" \
--server-name "0.0.0.0" \
--server-port "${GRADIO_PORT}" |