Fix HF exec entrypoint: generate entrypoint.sh in Docker; body in entrypoint_body.sh
c994fd2 verified | # Main startup logic (run as: bash /app/scripts/entrypoint_body.sh). | |
| # entrypoint.sh is generated in the Dockerfile so HF can exec it without CRLF/BOM issues. | |
| set -euo pipefail | |
| cd /app | |
| # Same-container vLLM: PyTorch may call getpass.getuser() before USER is set in some runtimes. | |
| export USER="${USER:-huggingface}" | |
| export LOGNAME="${LOGNAME:-$USER}" | |
| export TORCHINDUCTOR_CACHE_DIR="${TORCHINDUCTOR_CACHE_DIR:-/tmp/torch_inductor_cache}" | |
| export TRITON_CACHE_DIR="${TRITON_CACHE_DIR:-/tmp/triton_cache}" | |
| export PYTHONPATH="/app/vendor/rllm:${PYTHONPATH:-}" | |
| # Optional: load Space secrets copied to this path | |
| if [[ -f /app/.env.gen_image ]]; then | |
| set -a | |
| # shellcheck source=/dev/null | |
| source /app/.env.gen_image | |
| set +a | |
| fi | |
| if [[ "${START_VLLM_GENSEARCHER:-0}" != "1" ]]; then | |
| case "${OPENAI_BASE_URL:-}" in | |
| *127.0.0.1*|*localhost*) | |
| echo "[entrypoint] WARNING: OPENAI_BASE_URL points to loopback but START_VLLM_GENSEARCHER is not 1." | |
| echo "[entrypoint] The GenSearcher agent will get 'Connection error' unless a server listens here," | |
| echo "[entrypoint] or you set OPENAI_BASE_URL to an external OpenAI-compatible URL (ending in /v1)." | |
| ;; | |
| esac | |
| if [[ -z "${OPENAI_BASE_URL:-}" ]]; then | |
| echo "[entrypoint] OPENAI_BASE_URL is unset. For GenSearcher **inside this Space only**, set Space variable" | |
| echo "[entrypoint] START_VLLM_GENSEARCHER=1 (entrypoint will start vLLM here and set OPENAI_BASE_URL to loopback)." | |
| fi | |
| fi | |
| wait_http() { | |
| local url=$1 | |
| local name=$2 | |
| local max_attempts=${3:-90} | |
| local i=0 | |
| echo "[entrypoint] Waiting for ${name} (${url})..." | |
| until curl -sf "$url" >/dev/null 2>&1; do | |
| i=$((i + 1)) | |
| if [[ $i -ge $max_attempts ]]; then | |
| echo "[entrypoint] Timeout waiting for ${name}" | |
| exit 1 | |
| fi | |
| sleep 2 | |
| done | |
| echo "[entrypoint] ${name} is up." | |
| } | |
| # Defaults: only FireRed + Gradio in-container. Point OPENAI_BASE_URL / BROWSE_SUMMARY_BASE_URL | |
| # to your vLLM (or other OpenAI-compatible) endpoints via Space secrets. | |
| # --- Optional local vLLM: GenSearcher-8B (OpenAI-compatible) --- | |
| if [[ "${START_VLLM_GENSEARCHER:-0}" == "1" ]]; then | |
| CUDA_VISIBLE_DEVICES="${GENSEARCHER_CUDA_VISIBLE_DEVICES:-0}" \ | |
| vllm serve "${GENSEARCHER_MODEL_ID:-GenSearcher/Gen-Searcher-8B}" \ | |
| --host 0.0.0.0 \ | |
| --port 8002 \ | |
| --tensor-parallel-size "${GENSEARCHER_TP:-1}" \ | |
| --gpu-memory-utilization "${VLLM_GPU_MEMORY_UTIL:-0.85}" \ | |
| --served-model-name "${GEN_EVAL_MODEL:-Gen-Searcher-8B}" \ | |
| --max-model-len "${GENSEARCHER_MAX_MODEL_LEN:-65536}" \ | |
| --no-enable-prefix-caching & | |
| wait_http "http://127.0.0.1:8002/v1/models" "GenSearcher vLLM" | |
| export OPENAI_BASE_URL="${OPENAI_BASE_URL:-http://127.0.0.1:8002/v1}" | |
| fi | |
| # --- Optional local vLLM: browse summarization (Qwen3-VL) --- | |
| if [[ "${START_VLLM_BROWSE:-0}" == "1" ]]; then | |
| export BROWSE_GENERATE_ENGINE=vllm | |
| CUDA_VISIBLE_DEVICES="${BROWSE_CUDA_VISIBLE_DEVICES:-1}" \ | |
| vllm serve "${BROWSE_MODEL_ID:-Qwen/Qwen3-VL-30B-A3B-Instruct}" \ | |
| --host 0.0.0.0 \ | |
| --port 8003 \ | |
| --tensor-parallel-size "${BROWSE_TP:-1}" \ | |
| --gpu-memory-utilization "${VLLM_GPU_MEMORY_UTIL:-0.85}" \ | |
| --served-model-name "${BROWSE_SUMMARY_MODEL:-Qwen3-VL-30B-A3B-Instruct}" \ | |
| --max-model-len "${BROWSE_MAX_MODEL_LEN:-65536}" \ | |
| --mm-processor-cache-gb 0 \ | |
| --no-enable-prefix-caching & | |
| wait_http "http://127.0.0.1:8003/v1/models" "Browse-summary vLLM" | |
| export BROWSE_SUMMARY_BASE_URL="${BROWSE_SUMMARY_BASE_URL:-http://127.0.0.1:8003/v1}" | |
| fi | |
| # --- FireRed adapter (GenSearcher /generate contract) --- | |
| if [[ "${START_FIRERED_API:-1}" == "1" ]]; then | |
| CUDA_VISIBLE_DEVICES="${FIRERED_CUDA_VISIBLE_DEVICES:-0}" \ | |
| python -m uvicorn services.firered_generate:app --host 0.0.0.0 --port 8765 & | |
| wait_http "http://127.0.0.1:8765/health" "FireRed API" 120 | |
| export QWEN_EDIT_APP_URL="${QWEN_EDIT_APP_URL:-http://127.0.0.1:8765}" | |
| else | |
| echo "[entrypoint] START_FIRERED_API=0 — use external QWEN_EDIT_APP_URL for generation." | |
| fi | |
| exec python app.py | |