Spaces:
Running on A100
Running on A100
Update start.sh
Browse files
start.sh
CHANGED
|
@@ -2,8 +2,9 @@
|
|
| 2 |
set -euo pipefail
|
| 3 |
|
| 4 |
MODEL_NAME="${MODEL_NAME:-NM-dev/NuExtract3.4_4B-RL-400}"
|
| 5 |
-
VLLM_PORT="${VLLM_PORT:-
|
| 6 |
GRADIO_PORT="${GRADIO_SERVER_PORT:-7860}"
|
|
|
|
| 7 |
|
| 8 |
echo "Starting vLLM with model: ${MODEL_NAME}"
|
| 9 |
|
|
@@ -13,18 +14,23 @@ vllm serve "${MODEL_NAME}" \
|
|
| 13 |
--port "${VLLM_PORT}" \
|
| 14 |
--trust-remote-code \
|
| 15 |
--dtype auto \
|
| 16 |
-
--max-model-len "${MAX_MODEL_LEN:-
|
| 17 |
--gpu-memory-utilization "${GPU_MEMORY_UTILIZATION:-0.90}" \
|
| 18 |
-
--
|
|
|
|
| 19 |
|
| 20 |
VLLM_PID=$!
|
| 21 |
|
| 22 |
echo "Waiting for vLLM to become ready..."
|
| 23 |
-
until curl -sf
|
|
|
|
|
|
|
|
|
|
| 24 |
if ! kill -0 "${VLLM_PID}" 2>/dev/null; then
|
| 25 |
echo "vLLM exited before becoming ready."
|
| 26 |
exit 1
|
| 27 |
fi
|
|
|
|
| 28 |
sleep 2
|
| 29 |
done
|
| 30 |
|
|
@@ -33,6 +39,6 @@ echo "vLLM is ready. Starting Gradio..."
|
|
| 33 |
python /home/user/app/app.py \
|
| 34 |
--model-name "${MODEL_NAME}" \
|
| 35 |
--api-base "http://127.0.0.1:${VLLM_PORT}/v1" \
|
| 36 |
-
--api-key "${
|
| 37 |
--server-name "0.0.0.0" \
|
| 38 |
--server-port "${GRADIO_PORT}"
|
|
|
|
| 2 |
set -euo pipefail
|
| 3 |
|
| 4 |
MODEL_NAME="${MODEL_NAME:-NM-dev/NuExtract3.4_4B-RL-400}"
|
| 5 |
+
VLLM_PORT="${VLLM_PORT:-8001}"
|
| 6 |
GRADIO_PORT="${GRADIO_SERVER_PORT:-7860}"
|
| 7 |
+
API_KEY="${OPENAI_API_KEY:-EMPTY}"
|
| 8 |
|
| 9 |
echo "Starting vLLM with model: ${MODEL_NAME}"
|
| 10 |
|
|
|
|
| 14 |
--port "${VLLM_PORT}" \
|
| 15 |
--trust-remote-code \
|
| 16 |
--dtype auto \
|
| 17 |
+
--max-model-len "${MAX_MODEL_LEN:-8192}" \
|
| 18 |
--gpu-memory-utilization "${GPU_MEMORY_UTILIZATION:-0.90}" \
|
| 19 |
+
--limit-mm-per-prompt image=1 \
|
| 20 |
+
--api-key "${API_KEY}" &
|
| 21 |
|
| 22 |
VLLM_PID=$!
|
| 23 |
|
| 24 |
echo "Waiting for vLLM to become ready..."
|
| 25 |
+
until curl -sf \
|
| 26 |
+
-H "Authorization: Bearer ${API_KEY}" \
|
| 27 |
+
"http://127.0.0.1:${VLLM_PORT}/v1/models" >/dev/null; do
|
| 28 |
+
|
| 29 |
if ! kill -0 "${VLLM_PID}" 2>/dev/null; then
|
| 30 |
echo "vLLM exited before becoming ready."
|
| 31 |
exit 1
|
| 32 |
fi
|
| 33 |
+
|
| 34 |
sleep 2
|
| 35 |
done
|
| 36 |
|
|
|
|
| 39 |
python /home/user/app/app.py \
|
| 40 |
--model-name "${MODEL_NAME}" \
|
| 41 |
--api-base "http://127.0.0.1:${VLLM_PORT}/v1" \
|
| 42 |
+
--api-key "${API_KEY}" \
|
| 43 |
--server-name "0.0.0.0" \
|
| 44 |
--server-port "${GRADIO_PORT}"
|