JSCPPProgrammer commited on
Commit
9fc2e74
·
verified ·
1 Parent(s): e5d10c8

Fix vLLM/PyTorch getpwuid crash: USER, TORCHINDUCTOR_CACHE_DIR on HF Spaces

Browse files
Files changed (1) hide show
  1. scripts/entrypoint.sh +33 -79
scripts/entrypoint.sh CHANGED
@@ -1,88 +1,42 @@
1
- #!/usr/bin/env bash
2
- # Line endings must be LF (Unix); see .gitattributes and Dockerfile sed.
3
- set -euo pipefail
4
- cd /app
5
 
6
- export PYTHONPATH="/app/vendor/rllm:${PYTHONPATH:-}"
7
 
8
- # Optional: load Space secrets copied to this path
9
- if [[ -f /app/.env.gen_image ]]; then
10
- set -a
11
- # shellcheck source=/dev/null
12
- source /app/.env.gen_image
13
- set +a
14
- fi
15
 
16
- if [[ "${START_VLLM_GENSEARCHER:-0}" != "1" ]]; then
17
- case "${OPENAI_BASE_URL:-}" in
18
- *127.0.0.1*|*localhost*)
19
- echo "[entrypoint] WARNING: OPENAI_BASE_URL points to loopback but START_VLLM_GENSEARCHER is not 1."
20
- echo "[entrypoint] The GenSearcher agent will get 'Connection error' unless a server listens here,"
21
- echo "[entrypoint] or you set OPENAI_BASE_URL to an external OpenAI-compatible URL (ending in /v1)."
22
- ;;
23
- esac
24
- fi
25
 
26
- wait_http() {
27
- local url=$1
28
- local name=$2
29
- local max_attempts=${3:-90}
30
- local i=0
31
- echo "[entrypoint] Waiting for ${name} (${url})..."
32
- until curl -sf "$url" >/dev/null 2>&1; do
33
- i=$((i + 1))
34
- if [[ $i -ge $max_attempts ]]; then
35
- echo "[entrypoint] Timeout waiting for ${name}"
36
- exit 1
37
- fi
38
- sleep 2
39
- done
40
- echo "[entrypoint] ${name} is up."
41
- }
42
 
43
- # Defaults: only FireRed + Gradio in-container. Point OPENAI_BASE_URL / BROWSE_SUMMARY_BASE_URL
44
- # to your vLLM (or other OpenAI-compatible) endpoints via Space secrets.
 
 
 
 
 
 
45
 
46
- # --- Optional local vLLM: GenSearcher-8B (OpenAI-compatible) ---
47
- if [[ "${START_VLLM_GENSEARCHER:-0}" == "1" ]]; then
48
- CUDA_VISIBLE_DEVICES="${GENSEARCHER_CUDA_VISIBLE_DEVICES:-0}" \
49
- vllm serve "${GENSEARCHER_MODEL_ID:-GenSearcher/Gen-Searcher-8B}" \
50
- --host 0.0.0.0 \
51
- --port 8002 \
52
- --tensor-parallel-size "${GENSEARCHER_TP:-1}" \
53
- --gpu-memory-utilization "${VLLM_GPU_MEMORY_UTIL:-0.85}" \
54
- --served-model-name "${GEN_EVAL_MODEL:-Gen-Searcher-8B}" \
55
- --max-model-len "${GENSEARCHER_MAX_MODEL_LEN:-65536}" \
56
- --no-enable-prefix-caching &
57
- wait_http "http://127.0.0.1:8002/v1/models" "GenSearcher vLLM"
58
- export OPENAI_BASE_URL="${OPENAI_BASE_URL:-http://127.0.0.1:8002/v1}"
59
- fi
60
 
61
- # --- Optional local vLLM: browse summarization (Qwen3-VL) ---
62
- if [[ "${START_VLLM_BROWSE:-0}" == "1" ]]; then
63
- export BROWSE_GENERATE_ENGINE=vllm
64
- CUDA_VISIBLE_DEVICES="${BROWSE_CUDA_VISIBLE_DEVICES:-1}" \
65
- vllm serve "${BROWSE_MODEL_ID:-Qwen/Qwen3-VL-30B-A3B-Instruct}" \
66
- --host 0.0.0.0 \
67
- --port 8003 \
68
- --tensor-parallel-size "${BROWSE_TP:-1}" \
69
- --gpu-memory-utilization "${VLLM_GPU_MEMORY_UTIL:-0.85}" \
70
- --served-model-name "${BROWSE_SUMMARY_MODEL:-Qwen3-VL-30B-A3B-Instruct}" \
71
- --max-model-len "${BROWSE_MAX_MODEL_LEN:-65536}" \
72
- --mm-processor-cache-gb 0 \
73
- --no-enable-prefix-caching &
74
- wait_http "http://127.0.0.1:8003/v1/models" "Browse-summary vLLM"
75
- export BROWSE_SUMMARY_BASE_URL="${BROWSE_SUMMARY_BASE_URL:-http://127.0.0.1:8003/v1}"
76
- fi
77
 
78
- # --- FireRed adapter (GenSearcher /generate contract) ---
79
- if [[ "${START_FIRERED_API:-1}" == "1" ]]; then
80
- CUDA_VISIBLE_DEVICES="${FIRERED_CUDA_VISIBLE_DEVICES:-0}" \
81
- python -m uvicorn services.firered_generate:app --host 0.0.0.0 --port 8765 &
82
- wait_http "http://127.0.0.1:8765/health" "FireRed API" 120
83
- export QWEN_EDIT_APP_URL="${QWEN_EDIT_APP_URL:-http://127.0.0.1:8765}"
84
- else
85
- echo "[entrypoint] START_FIRERED_API=0 — use external QWEN_EDIT_APP_URL for generation."
86
- fi
87
 
88
- exec python app.py
 
 
 
1
+ # Hugging Face Space (Docker) — GenSearcher + FireRed
2
+ # Requires GPU. For multi-GPU full-local mode, set START_VLLM_*=1 and CUDA device envs in README.
 
 
3
 
4
+ FROM pytorch/pytorch:2.5.1-cuda12.4-cudnn9-runtime
5
 
6
+ ENV DEBIAN_FRONTEND=noninteractive
7
+ RUN apt-get update && apt-get install -y --no-install-recommends \
8
+ curl \
9
+ git \
10
+ && rm -rf /var/lib/apt/lists/*
 
 
11
 
12
+ WORKDIR /app
 
 
 
 
 
 
 
 
13
 
14
+ COPY vendor/rllm /app/vendor/rllm
15
+ COPY requirements.txt /app/requirements.txt
16
+ COPY app.py space_gen.py space_health.py /app/
17
+ COPY services /app/services
18
+ COPY scripts /app/scripts
 
 
 
 
 
 
 
 
 
 
 
19
 
20
+ ENV PYTHONPATH=/app/vendor/rllm
21
+ ENV GRADIO_SERVER_PORT=7860
22
+ # HF Spaces / minimal images often have uid 1000 with no /etc/passwd entry; PyTorch Inductor calls
23
+ # getpass.getuser() and crashes with KeyError. USER/LOGNAME short-circuit getuser(); cache dirs avoid $HOME issues.
24
+ ENV USER=huggingface
25
+ ENV LOGNAME=huggingface
26
+ ENV TORCHINDUCTOR_CACHE_DIR=/tmp/torch_inductor_cache
27
+ ENV TRITON_CACHE_DIR=/tmp/triton_cache
28
 
29
+ RUN pip install --no-cache-dir --upgrade pip setuptools wheel \
30
+ && pip install --no-cache-dir -e /app/vendor/rllm \
31
+ && pip install --no-cache-dir -r /app/requirements.txt
 
 
 
 
 
 
 
 
 
 
 
32
 
33
+ # Optional: local vLLM inside the image (large). Disable with build-arg if you only use external APIs.
34
+ ARG INSTALL_VLLM=1
35
+ RUN if [ "$INSTALL_VLLM" = "1" ]; then pip install --no-cache-dir "vllm>=0.6.3"; fi
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
+ # Strip Windows CRLF if present (avoids: /usr/bin/env: 'bash\r': No such file or directory)
38
+ RUN sed -i 's/\r$//' /app/scripts/entrypoint.sh && chmod +x /app/scripts/entrypoint.sh
 
 
 
 
 
 
 
39
 
40
+ EXPOSE 7860
41
+
42
+ CMD ["/app/scripts/entrypoint.sh"]