Spaces:
Running on A100
Running on A100
| set -euo pipefail | |
| MODEL_NAME="${MODEL_NAME:-NM-dev/NuExtract3.4_4B-RL-400}" | |
| VLLM_PORT="${VLLM_PORT:-8001}" | |
| GRADIO_PORT="${GRADIO_SERVER_PORT:-7860}" | |
| API_KEY="${OPENAI_API_KEY:-EMPTY}" | |
| echo "Starting vLLM with model: ${MODEL_NAME}" | |
| vllm serve "${MODEL_NAME}" \ | |
| --served-model-name "${MODEL_NAME}" \ | |
| --host 127.0.0.1 \ | |
| --port "${VLLM_PORT}" \ | |
| --trust-remote-code \ | |
| --dtype auto \ | |
| --max-model-len "${MAX_MODEL_LEN:-8192}" \ | |
| --gpu-memory-utilization "${GPU_MEMORY_UTILIZATION:-0.90}" \ | |
| --api-key "${API_KEY}" & | |
| VLLM_PID=$! | |
| echo "Waiting for vLLM to become ready..." | |
| until curl -sf \ | |
| -H "Authorization: Bearer ${API_KEY}" \ | |
| "http://127.0.0.1:${VLLM_PORT}/v1/models" >/dev/null; do | |
| if ! kill -0 "${VLLM_PID}" 2>/dev/null; then | |
| echo "vLLM exited before becoming ready." | |
| exit 1 | |
| fi | |
| sleep 2 | |
| done | |
| echo "vLLM is ready. Starting Gradio..." | |
| python3 /home/user/app/app.py \ | |
| --model-name "${MODEL_NAME}" \ | |
| --api-base "http://127.0.0.1:${VLLM_PORT}/v1" \ | |
| --api-key "${API_KEY}" \ | |
| --server-name "0.0.0.0" \ | |
| --server-port "${GRADIO_PORT}" |