Spaces:

JSCPPProgrammer
/

gensearcher-firered

Paused

App Files Files Community

JSCPPProgrammer commited on Apr 5

Commit

9fc2e74

verified ·

1 Parent(s): e5d10c8

Fix vLLM/PyTorch getpwuid crash: USER, TORCHINDUCTOR_CACHE_DIR on HF Spaces

Browse files

Files changed (1) hide show

scripts/entrypoint.sh +33 -79

scripts/entrypoint.sh CHANGED Viewed

@@ -1,88 +1,42 @@
-#!/usr/bin/env bash
-# Line endings must be LF (Unix); see .gitattributes and Dockerfile sed.
-set -euo pipefail
-cd /app
-export PYTHONPATH="/app/vendor/rllm:${PYTHONPATH:-}"
-# Optional: load Space secrets copied to this path
-if [[ -f /app/.env.gen_image ]]; then
-  set -a
-  # shellcheck source=/dev/null
-  source /app/.env.gen_image
-  set +a
-fi
-if [[ "${START_VLLM_GENSEARCHER:-0}" != "1" ]]; then
-  case "${OPENAI_BASE_URL:-}" in
-    *127.0.0.1*|*localhost*)
-      echo "[entrypoint] WARNING: OPENAI_BASE_URL points to loopback but START_VLLM_GENSEARCHER is not 1."
-      echo "[entrypoint]          The GenSearcher agent will get 'Connection error' unless a server listens here,"
-      echo "[entrypoint]          or you set OPENAI_BASE_URL to an external OpenAI-compatible URL (ending in /v1)."
-      ;;
-  esac
-fi
-wait_http() {
-  local url=$1
-  local name=$2
-  local max_attempts=${3:-90}
-  local i=0
-  echo "[entrypoint] Waiting for ${name} (${url})..."
-  until curl -sf "$url" >/dev/null 2>&1; do
-    i=$((i + 1))
-    if [[ $i -ge $max_attempts ]]; then
-      echo "[entrypoint] Timeout waiting for ${name}"
-      exit 1
-    fi
-    sleep 2
-  done
-  echo "[entrypoint] ${name} is up."
-}
-# Defaults: only FireRed + Gradio in-container. Point OPENAI_BASE_URL / BROWSE_SUMMARY_BASE_URL
-# to your vLLM (or other OpenAI-compatible) endpoints via Space secrets.
-# --- Optional local vLLM: GenSearcher-8B (OpenAI-compatible) ---
-if [[ "${START_VLLM_GENSEARCHER:-0}" == "1" ]]; then
-  CUDA_VISIBLE_DEVICES="${GENSEARCHER_CUDA_VISIBLE_DEVICES:-0}" \
-    vllm serve "${GENSEARCHER_MODEL_ID:-GenSearcher/Gen-Searcher-8B}" \
-    --host 0.0.0.0 \
-    --port 8002 \
-    --tensor-parallel-size "${GENSEARCHER_TP:-1}" \
-    --gpu-memory-utilization "${VLLM_GPU_MEMORY_UTIL:-0.85}" \
-    --served-model-name "${GEN_EVAL_MODEL:-Gen-Searcher-8B}" \
-    --max-model-len "${GENSEARCHER_MAX_MODEL_LEN:-65536}" \
-    --no-enable-prefix-caching &
-  wait_http "http://127.0.0.1:8002/v1/models" "GenSearcher vLLM"
-  export OPENAI_BASE_URL="${OPENAI_BASE_URL:-http://127.0.0.1:8002/v1}"
-fi
-# --- Optional local vLLM: browse summarization (Qwen3-VL) ---
-if [[ "${START_VLLM_BROWSE:-0}" == "1" ]]; then
-  export BROWSE_GENERATE_ENGINE=vllm
-  CUDA_VISIBLE_DEVICES="${BROWSE_CUDA_VISIBLE_DEVICES:-1}" \
-    vllm serve "${BROWSE_MODEL_ID:-Qwen/Qwen3-VL-30B-A3B-Instruct}" \
-    --host 0.0.0.0 \
-    --port 8003 \
-    --tensor-parallel-size "${BROWSE_TP:-1}" \
-    --gpu-memory-utilization "${VLLM_GPU_MEMORY_UTIL:-0.85}" \
-    --served-model-name "${BROWSE_SUMMARY_MODEL:-Qwen3-VL-30B-A3B-Instruct}" \
-    --max-model-len "${BROWSE_MAX_MODEL_LEN:-65536}" \
-    --mm-processor-cache-gb 0 \
-    --no-enable-prefix-caching &
-  wait_http "http://127.0.0.1:8003/v1/models" "Browse-summary vLLM"
-  export BROWSE_SUMMARY_BASE_URL="${BROWSE_SUMMARY_BASE_URL:-http://127.0.0.1:8003/v1}"
-fi
-# --- FireRed adapter (GenSearcher /generate contract) ---
-if [[ "${START_FIRERED_API:-1}" == "1" ]]; then
-  CUDA_VISIBLE_DEVICES="${FIRERED_CUDA_VISIBLE_DEVICES:-0}" \
-    python -m uvicorn services.firered_generate:app --host 0.0.0.0 --port 8765 &
-  wait_http "http://127.0.0.1:8765/health" "FireRed API" 120
-  export QWEN_EDIT_APP_URL="${QWEN_EDIT_APP_URL:-http://127.0.0.1:8765}"
-else
-  echo "[entrypoint] START_FIRERED_API=0 — use external QWEN_EDIT_APP_URL for generation."
-fi
-exec python app.py

+# Hugging Face Space (Docker) — GenSearcher + FireRed
+# Requires GPU. For multi-GPU full-local mode, set START_VLLM_*=1 and CUDA device envs in README.
+FROM pytorch/pytorch:2.5.1-cuda12.4-cudnn9-runtime
+ENV DEBIAN_FRONTEND=noninteractive
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    curl \
+    git \
+    && rm -rf /var/lib/apt/lists/*
+WORKDIR /app
+COPY vendor/rllm /app/vendor/rllm
+COPY requirements.txt /app/requirements.txt
+COPY app.py space_gen.py space_health.py /app/
+COPY services /app/services
+COPY scripts /app/scripts
+ENV PYTHONPATH=/app/vendor/rllm
+ENV GRADIO_SERVER_PORT=7860
+# HF Spaces / minimal images often have uid 1000 with no /etc/passwd entry; PyTorch Inductor calls
+# getpass.getuser() and crashes with KeyError. USER/LOGNAME short-circuit getuser(); cache dirs avoid $HOME issues.
+ENV USER=huggingface
+ENV LOGNAME=huggingface
+ENV TORCHINDUCTOR_CACHE_DIR=/tmp/torch_inductor_cache
+ENV TRITON_CACHE_DIR=/tmp/triton_cache
+RUN pip install --no-cache-dir --upgrade pip setuptools wheel \
+    && pip install --no-cache-dir -e /app/vendor/rllm \
+    && pip install --no-cache-dir -r /app/requirements.txt
+# Optional: local vLLM inside the image (large). Disable with build-arg if you only use external APIs.
+ARG INSTALL_VLLM=1
+RUN if [ "$INSTALL_VLLM" = "1" ]; then pip install --no-cache-dir "vllm>=0.6.3"; fi
+# Strip Windows CRLF if present (avoids: /usr/bin/env: 'bash\r': No such file or directory)
+RUN sed -i 's/\r$//' /app/scripts/entrypoint.sh && chmod +x /app/scripts/entrypoint.sh
+EXPOSE 7860
+CMD ["/app/scripts/entrypoint.sh"]