#!/usr/bin/env bash set -euo pipefail MODEL_NAME="${MODEL_NAME:-NM-dev/NuExtract3.4_4B-RL-400}" VLLM_PORT="${VLLM_PORT:-8001}" GRADIO_PORT="${GRADIO_SERVER_PORT:-7860}" API_KEY="${OPENAI_API_KEY:-EMPTY}" echo "Starting vLLM with model: ${MODEL_NAME}" vllm serve "${MODEL_NAME}" \ --served-model-name "${MODEL_NAME}" \ --host 127.0.0.1 \ --port "${VLLM_PORT}" \ --trust-remote-code \ --dtype auto \ --max-model-len "${MAX_MODEL_LEN:-8192}" \ --gpu-memory-utilization "${GPU_MEMORY_UTILIZATION:-0.90}" \ --api-key "${API_KEY}" & VLLM_PID=$! echo "Waiting for vLLM to become ready..." until curl -sf \ -H "Authorization: Bearer ${API_KEY}" \ "http://127.0.0.1:${VLLM_PORT}/v1/models" >/dev/null; do if ! kill -0 "${VLLM_PID}" 2>/dev/null; then echo "vLLM exited before becoming ready." exit 1 fi sleep 2 done echo "vLLM is ready. Starting Gradio..." python3 /home/user/app/app.py \ --model-name "${MODEL_NAME}" \ --api-base "http://127.0.0.1:${VLLM_PORT}/v1" \ --api-key "${API_KEY}" \ --server-name "0.0.0.0" \ --server-port "${GRADIO_PORT}"