JSCPPProgrammer commited on
Commit
c3cfe1c
·
verified ·
1 Parent(s): 80b7188

fix: Unix LF for entrypoint.sh + Dockerfile sed CRLF guard

Browse files
Files changed (3) hide show
  1. .gitattributes +2 -35
  2. Dockerfile +36 -35
  3. scripts/entrypoint.sh +77 -77
.gitattributes CHANGED
@@ -1,37 +1,4 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  vendor/rllm/docs/assets/rllm_components.png filter=lfs diff=lfs merge=lfs -text
37
  vendor/rllm/docs/assets/sdk_arch.png filter=lfs diff=lfs merge=lfs -text
 
1
+ # Linux containers require LF in shell scripts (CRLF causes: env: 'bash\r': No such file)
2
+ *.sh text eol=lf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  vendor/rllm/docs/assets/rllm_components.png filter=lfs diff=lfs merge=lfs -text
4
  vendor/rllm/docs/assets/sdk_arch.png filter=lfs diff=lfs merge=lfs -text
Dockerfile CHANGED
@@ -1,35 +1,36 @@
1
- # Hugging Face Space (Docker) — GenSearcher + FireRed
2
- # Requires GPU. For multi-GPU full-local mode, set START_VLLM_*=1 and CUDA device envs in README.
3
-
4
- FROM pytorch/pytorch:2.5.1-cuda12.4-cudnn9-runtime
5
-
6
- ENV DEBIAN_FRONTEND=noninteractive
7
- RUN apt-get update && apt-get install -y --no-install-recommends \
8
- curl \
9
- git \
10
- && rm -rf /var/lib/apt/lists/*
11
-
12
- WORKDIR /app
13
-
14
- COPY vendor/rllm /app/vendor/rllm
15
- COPY requirements.txt /app/requirements.txt
16
- COPY app.py space_gen.py /app/
17
- COPY services /app/services
18
- COPY scripts /app/scripts
19
-
20
- ENV PYTHONPATH=/app/vendor/rllm
21
- ENV GRADIO_SERVER_PORT=7860
22
-
23
- RUN pip install --no-cache-dir --upgrade pip setuptools wheel \
24
- && pip install --no-cache-dir -e /app/vendor/rllm \
25
- && pip install --no-cache-dir -r /app/requirements.txt
26
-
27
- # Optional: local vLLM inside the image (large). Disable with build-arg if you only use external APIs.
28
- ARG INSTALL_VLLM=1
29
- RUN if [ "$INSTALL_VLLM" = "1" ]; then pip install --no-cache-dir "vllm>=0.6.3"; fi
30
-
31
- RUN chmod +x /app/scripts/entrypoint.sh
32
-
33
- EXPOSE 7860
34
-
35
- CMD ["/app/scripts/entrypoint.sh"]
 
 
1
+ # Hugging Face Space (Docker) — GenSearcher + FireRed
2
+ # Requires GPU. For multi-GPU full-local mode, set START_VLLM_*=1 and CUDA device envs in README.
3
+
4
+ FROM pytorch/pytorch:2.5.1-cuda12.4-cudnn9-runtime
5
+
6
+ ENV DEBIAN_FRONTEND=noninteractive
7
+ RUN apt-get update && apt-get install -y --no-install-recommends \
8
+ curl \
9
+ git \
10
+ && rm -rf /var/lib/apt/lists/*
11
+
12
+ WORKDIR /app
13
+
14
+ COPY vendor/rllm /app/vendor/rllm
15
+ COPY requirements.txt /app/requirements.txt
16
+ COPY app.py space_gen.py /app/
17
+ COPY services /app/services
18
+ COPY scripts /app/scripts
19
+
20
+ ENV PYTHONPATH=/app/vendor/rllm
21
+ ENV GRADIO_SERVER_PORT=7860
22
+
23
+ RUN pip install --no-cache-dir --upgrade pip setuptools wheel \
24
+ && pip install --no-cache-dir -e /app/vendor/rllm \
25
+ && pip install --no-cache-dir -r /app/requirements.txt
26
+
27
+ # Optional: local vLLM inside the image (large). Disable with build-arg if you only use external APIs.
28
+ ARG INSTALL_VLLM=1
29
+ RUN if [ "$INSTALL_VLLM" = "1" ]; then pip install --no-cache-dir "vllm>=0.6.3"; fi
30
+
31
+ # Strip Windows CRLF if present (avoids: /usr/bin/env: 'bash\r': No such file or directory)
32
+ RUN sed -i 's/\r$//' /app/scripts/entrypoint.sh && chmod +x /app/scripts/entrypoint.sh
33
+
34
+ EXPOSE 7860
35
+
36
+ CMD ["/app/scripts/entrypoint.sh"]
scripts/entrypoint.sh CHANGED
@@ -1,77 +1,77 @@
1
- #!/usr/bin/env bash
2
- set -euo pipefail
3
- cd /app
4
-
5
- export PYTHONPATH="/app/vendor/rllm:${PYTHONPATH:-}"
6
-
7
- # Optional: load Space secrets copied to this path
8
- if [[ -f /app/.env.gen_image ]]; then
9
- set -a
10
- # shellcheck source=/dev/null
11
- source /app/.env.gen_image
12
- set +a
13
- fi
14
-
15
- wait_http() {
16
- local url=$1
17
- local name=$2
18
- local max_attempts=${3:-90}
19
- local i=0
20
- echo "[entrypoint] Waiting for ${name} (${url})..."
21
- until curl -sf "$url" >/dev/null 2>&1; do
22
- i=$((i + 1))
23
- if [[ $i -ge $max_attempts ]]; then
24
- echo "[entrypoint] Timeout waiting for ${name}"
25
- exit 1
26
- fi
27
- sleep 2
28
- done
29
- echo "[entrypoint] ${name} is up."
30
- }
31
-
32
- # Defaults: only FireRed + Gradio in-container. Point OPENAI_BASE_URL / BROWSE_SUMMARY_BASE_URL
33
- # to your vLLM (or other OpenAI-compatible) endpoints via Space secrets.
34
-
35
- # --- Optional local vLLM: GenSearcher-8B (OpenAI-compatible) ---
36
- if [[ "${START_VLLM_GENSEARCHER:-0}" == "1" ]]; then
37
- CUDA_VISIBLE_DEVICES="${GENSEARCHER_CUDA_VISIBLE_DEVICES:-0}" \
38
- vllm serve "${GENSEARCHER_MODEL_ID:-GenSearcher/Gen-Searcher-8B}" \
39
- --host 0.0.0.0 \
40
- --port 8002 \
41
- --tensor-parallel-size "${GENSEARCHER_TP:-1}" \
42
- --gpu-memory-utilization "${VLLM_GPU_MEMORY_UTIL:-0.85}" \
43
- --served-model-name "${GEN_EVAL_MODEL:-Gen-Searcher-8B}" \
44
- --max-model-len "${GENSEARCHER_MAX_MODEL_LEN:-65536}" \
45
- --no-enable-prefix-caching &
46
- wait_http "http://127.0.0.1:8002/v1/models" "GenSearcher vLLM"
47
- export OPENAI_BASE_URL="${OPENAI_BASE_URL:-http://127.0.0.1:8002/v1}"
48
- fi
49
-
50
- # --- Optional local vLLM: browse summarization (Qwen3-VL) ---
51
- if [[ "${START_VLLM_BROWSE:-0}" == "1" ]]; then
52
- export BROWSE_GENERATE_ENGINE=vllm
53
- CUDA_VISIBLE_DEVICES="${BROWSE_CUDA_VISIBLE_DEVICES:-1}" \
54
- vllm serve "${BROWSE_MODEL_ID:-Qwen/Qwen3-VL-30B-A3B-Instruct}" \
55
- --host 0.0.0.0 \
56
- --port 8003 \
57
- --tensor-parallel-size "${BROWSE_TP:-1}" \
58
- --gpu-memory-utilization "${VLLM_GPU_MEMORY_UTIL:-0.85}" \
59
- --served-model-name "${BROWSE_SUMMARY_MODEL:-Qwen3-VL-30B-A3B-Instruct}" \
60
- --max-model-len "${BROWSE_MAX_MODEL_LEN:-65536}" \
61
- --mm-processor-cache-gb 0 \
62
- --no-enable-prefix-caching &
63
- wait_http "http://127.0.0.1:8003/v1/models" "Browse-summary vLLM"
64
- export BROWSE_SUMMARY_BASE_URL="${BROWSE_SUMMARY_BASE_URL:-http://127.0.0.1:8003/v1}"
65
- fi
66
-
67
- # --- FireRed adapter (GenSearcher /generate contract) ---
68
- if [[ "${START_FIRERED_API:-1}" == "1" ]]; then
69
- CUDA_VISIBLE_DEVICES="${FIRERED_CUDA_VISIBLE_DEVICES:-0}" \
70
- python -m uvicorn services.firered_generate:app --host 0.0.0.0 --port 8765 &
71
- wait_http "http://127.0.0.1:8765/health" "FireRed API" 120
72
- export QWEN_EDIT_APP_URL="${QWEN_EDIT_APP_URL:-http://127.0.0.1:8765}"
73
- else
74
- echo "[entrypoint] START_FIRERED_API=0 — use external QWEN_EDIT_APP_URL for generation."
75
- fi
76
-
77
- exec python app.py
 
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ cd /app
4
+
5
+ export PYTHONPATH="/app/vendor/rllm:${PYTHONPATH:-}"
6
+
7
+ # Optional: load Space secrets copied to this path
8
+ if [[ -f /app/.env.gen_image ]]; then
9
+ set -a
10
+ # shellcheck source=/dev/null
11
+ source /app/.env.gen_image
12
+ set +a
13
+ fi
14
+
15
+ wait_http() {
16
+ local url=$1
17
+ local name=$2
18
+ local max_attempts=${3:-90}
19
+ local i=0
20
+ echo "[entrypoint] Waiting for ${name} (${url})..."
21
+ until curl -sf "$url" >/dev/null 2>&1; do
22
+ i=$((i + 1))
23
+ if [[ $i -ge $max_attempts ]]; then
24
+ echo "[entrypoint] Timeout waiting for ${name}"
25
+ exit 1
26
+ fi
27
+ sleep 2
28
+ done
29
+ echo "[entrypoint] ${name} is up."
30
+ }
31
+
32
+ # Defaults: only FireRed + Gradio in-container. Point OPENAI_BASE_URL / BROWSE_SUMMARY_BASE_URL
33
+ # to your vLLM (or other OpenAI-compatible) endpoints via Space secrets.
34
+
35
+ # --- Optional local vLLM: GenSearcher-8B (OpenAI-compatible) ---
36
+ if [[ "${START_VLLM_GENSEARCHER:-0}" == "1" ]]; then
37
+ CUDA_VISIBLE_DEVICES="${GENSEARCHER_CUDA_VISIBLE_DEVICES:-0}" \
38
+ vllm serve "${GENSEARCHER_MODEL_ID:-GenSearcher/Gen-Searcher-8B}" \
39
+ --host 0.0.0.0 \
40
+ --port 8002 \
41
+ --tensor-parallel-size "${GENSEARCHER_TP:-1}" \
42
+ --gpu-memory-utilization "${VLLM_GPU_MEMORY_UTIL:-0.85}" \
43
+ --served-model-name "${GEN_EVAL_MODEL:-Gen-Searcher-8B}" \
44
+ --max-model-len "${GENSEARCHER_MAX_MODEL_LEN:-65536}" \
45
+ --no-enable-prefix-caching &
46
+ wait_http "http://127.0.0.1:8002/v1/models" "GenSearcher vLLM"
47
+ export OPENAI_BASE_URL="${OPENAI_BASE_URL:-http://127.0.0.1:8002/v1}"
48
+ fi
49
+
50
+ # --- Optional local vLLM: browse summarization (Qwen3-VL) ---
51
+ if [[ "${START_VLLM_BROWSE:-0}" == "1" ]]; then
52
+ export BROWSE_GENERATE_ENGINE=vllm
53
+ CUDA_VISIBLE_DEVICES="${BROWSE_CUDA_VISIBLE_DEVICES:-1}" \
54
+ vllm serve "${BROWSE_MODEL_ID:-Qwen/Qwen3-VL-30B-A3B-Instruct}" \
55
+ --host 0.0.0.0 \
56
+ --port 8003 \
57
+ --tensor-parallel-size "${BROWSE_TP:-1}" \
58
+ --gpu-memory-utilization "${VLLM_GPU_MEMORY_UTIL:-0.85}" \
59
+ --served-model-name "${BROWSE_SUMMARY_MODEL:-Qwen3-VL-30B-A3B-Instruct}" \
60
+ --max-model-len "${BROWSE_MAX_MODEL_LEN:-65536}" \
61
+ --mm-processor-cache-gb 0 \
62
+ --no-enable-prefix-caching &
63
+ wait_http "http://127.0.0.1:8003/v1/models" "Browse-summary vLLM"
64
+ export BROWSE_SUMMARY_BASE_URL="${BROWSE_SUMMARY_BASE_URL:-http://127.0.0.1:8003/v1}"
65
+ fi
66
+
67
+ # --- FireRed adapter (GenSearcher /generate contract) ---
68
+ if [[ "${START_FIRERED_API:-1}" == "1" ]]; then
69
+ CUDA_VISIBLE_DEVICES="${FIRERED_CUDA_VISIBLE_DEVICES:-0}" \
70
+ python -m uvicorn services.firered_generate:app --host 0.0.0.0 --port 8765 &
71
+ wait_http "http://127.0.0.1:8765/health" "FireRed API" 120
72
+ export QWEN_EDIT_APP_URL="${QWEN_EDIT_APP_URL:-http://127.0.0.1:8765}"
73
+ else
74
+ echo "[entrypoint] START_FIRERED_API=0 — use external QWEN_EDIT_APP_URL for generation."
75
+ fi
76
+
77
+ exec python app.py