MightyOctopus's picture
Update entrypoint.sh
02d258a verified
raw
history blame contribute delete
555 Bytes
#!/bin/bash
set -euo pipefail
# ---- runtime env fixes (stop libgomp and torchinductor errors) ----
export OMP_NUM_THREADS=1
export TORCHINDUCTOR_CACHE_DIR=/tmp/torchinductor_cache
mkdir -p "$TORCHINDUCTOR_CACHE_DIR"
#start vLLM (OpenAI server)
python3 -m vllm.entrypoints.openai.api_server \
--model "${MODEL_DIR:-/app/models/model}" \
--served-model-name "Qwen/Qwen1.5-4B-Chat-AWQ" \
--host 0.0.0.0 \
--port 8000 \
--enforce-eager \
--max-model-len 2048 \
--gpu-memory-utilization 0.8 &
# Start the Gradio app
exec python3 /app/app.py