fix: Unix LF for entrypoint.sh + Dockerfile sed CRLF guard
Browse files- .gitattributes +2 -35
- Dockerfile +36 -35
- scripts/entrypoint.sh +77 -77
.gitattributes
CHANGED
|
@@ -1,37 +1,4 @@
|
|
| 1 |
-
|
| 2 |
-
*.
|
| 3 |
-
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
-
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
-
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
-
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
-
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
-
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
-
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
-
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
-
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
-
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
-
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
-
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
-
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
-
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
-
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
-
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
-
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
-
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
-
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
-
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
-
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
-
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
-
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
-
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
-
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
-
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
-
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
-
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
-
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
-
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
-
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
-
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
-
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
vendor/rllm/docs/assets/rllm_components.png filter=lfs diff=lfs merge=lfs -text
|
| 37 |
vendor/rllm/docs/assets/sdk_arch.png filter=lfs diff=lfs merge=lfs -text
|
|
|
|
| 1 |
+
# Linux containers require LF in shell scripts (CRLF causes: env: 'bash\r': No such file)
|
| 2 |
+
*.sh text eol=lf
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
vendor/rllm/docs/assets/rllm_components.png filter=lfs diff=lfs merge=lfs -text
|
| 4 |
vendor/rllm/docs/assets/sdk_arch.png filter=lfs diff=lfs merge=lfs -text
|
Dockerfile
CHANGED
|
@@ -1,35 +1,36 @@
|
|
| 1 |
-
# Hugging Face Space (Docker) — GenSearcher + FireRed
|
| 2 |
-
# Requires GPU. For multi-GPU full-local mode, set START_VLLM_*=1 and CUDA device envs in README.
|
| 3 |
-
|
| 4 |
-
FROM pytorch/pytorch:2.5.1-cuda12.4-cudnn9-runtime
|
| 5 |
-
|
| 6 |
-
ENV DEBIAN_FRONTEND=noninteractive
|
| 7 |
-
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 8 |
-
curl \
|
| 9 |
-
git \
|
| 10 |
-
&& rm -rf /var/lib/apt/lists/*
|
| 11 |
-
|
| 12 |
-
WORKDIR /app
|
| 13 |
-
|
| 14 |
-
COPY vendor/rllm /app/vendor/rllm
|
| 15 |
-
COPY requirements.txt /app/requirements.txt
|
| 16 |
-
COPY app.py space_gen.py /app/
|
| 17 |
-
COPY services /app/services
|
| 18 |
-
COPY scripts /app/scripts
|
| 19 |
-
|
| 20 |
-
ENV PYTHONPATH=/app/vendor/rllm
|
| 21 |
-
ENV GRADIO_SERVER_PORT=7860
|
| 22 |
-
|
| 23 |
-
RUN pip install --no-cache-dir --upgrade pip setuptools wheel \
|
| 24 |
-
&& pip install --no-cache-dir -e /app/vendor/rllm \
|
| 25 |
-
&& pip install --no-cache-dir -r /app/requirements.txt
|
| 26 |
-
|
| 27 |
-
# Optional: local vLLM inside the image (large). Disable with build-arg if you only use external APIs.
|
| 28 |
-
ARG INSTALL_VLLM=1
|
| 29 |
-
RUN if [ "$INSTALL_VLLM" = "1" ]; then pip install --no-cache-dir "vllm>=0.6.3"; fi
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
|
|
|
|
|
| 1 |
+
# Hugging Face Space (Docker) — GenSearcher + FireRed
|
| 2 |
+
# Requires GPU. For multi-GPU full-local mode, set START_VLLM_*=1 and CUDA device envs in README.
|
| 3 |
+
|
| 4 |
+
FROM pytorch/pytorch:2.5.1-cuda12.4-cudnn9-runtime
|
| 5 |
+
|
| 6 |
+
ENV DEBIAN_FRONTEND=noninteractive
|
| 7 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 8 |
+
curl \
|
| 9 |
+
git \
|
| 10 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 11 |
+
|
| 12 |
+
WORKDIR /app
|
| 13 |
+
|
| 14 |
+
COPY vendor/rllm /app/vendor/rllm
|
| 15 |
+
COPY requirements.txt /app/requirements.txt
|
| 16 |
+
COPY app.py space_gen.py /app/
|
| 17 |
+
COPY services /app/services
|
| 18 |
+
COPY scripts /app/scripts
|
| 19 |
+
|
| 20 |
+
ENV PYTHONPATH=/app/vendor/rllm
|
| 21 |
+
ENV GRADIO_SERVER_PORT=7860
|
| 22 |
+
|
| 23 |
+
RUN pip install --no-cache-dir --upgrade pip setuptools wheel \
|
| 24 |
+
&& pip install --no-cache-dir -e /app/vendor/rllm \
|
| 25 |
+
&& pip install --no-cache-dir -r /app/requirements.txt
|
| 26 |
+
|
| 27 |
+
# Optional: local vLLM inside the image (large). Disable with build-arg if you only use external APIs.
|
| 28 |
+
ARG INSTALL_VLLM=1
|
| 29 |
+
RUN if [ "$INSTALL_VLLM" = "1" ]; then pip install --no-cache-dir "vllm>=0.6.3"; fi
|
| 30 |
+
|
| 31 |
+
# Strip Windows CRLF if present (avoids: /usr/bin/env: 'bash\r': No such file or directory)
|
| 32 |
+
RUN sed -i 's/\r$//' /app/scripts/entrypoint.sh && chmod +x /app/scripts/entrypoint.sh
|
| 33 |
+
|
| 34 |
+
EXPOSE 7860
|
| 35 |
+
|
| 36 |
+
CMD ["/app/scripts/entrypoint.sh"]
|
scripts/entrypoint.sh
CHANGED
|
@@ -1,77 +1,77 @@
|
|
| 1 |
-
#!/usr/bin/env bash
|
| 2 |
-
set -euo pipefail
|
| 3 |
-
cd /app
|
| 4 |
-
|
| 5 |
-
export PYTHONPATH="/app/vendor/rllm:${PYTHONPATH:-}"
|
| 6 |
-
|
| 7 |
-
# Optional: load Space secrets copied to this path
|
| 8 |
-
if [[ -f /app/.env.gen_image ]]; then
|
| 9 |
-
set -a
|
| 10 |
-
# shellcheck source=/dev/null
|
| 11 |
-
source /app/.env.gen_image
|
| 12 |
-
set +a
|
| 13 |
-
fi
|
| 14 |
-
|
| 15 |
-
wait_http() {
|
| 16 |
-
local url=$1
|
| 17 |
-
local name=$2
|
| 18 |
-
local max_attempts=${3:-90}
|
| 19 |
-
local i=0
|
| 20 |
-
echo "[entrypoint] Waiting for ${name} (${url})..."
|
| 21 |
-
until curl -sf "$url" >/dev/null 2>&1; do
|
| 22 |
-
i=$((i + 1))
|
| 23 |
-
if [[ $i -ge $max_attempts ]]; then
|
| 24 |
-
echo "[entrypoint] Timeout waiting for ${name}"
|
| 25 |
-
exit 1
|
| 26 |
-
fi
|
| 27 |
-
sleep 2
|
| 28 |
-
done
|
| 29 |
-
echo "[entrypoint] ${name} is up."
|
| 30 |
-
}
|
| 31 |
-
|
| 32 |
-
# Defaults: only FireRed + Gradio in-container. Point OPENAI_BASE_URL / BROWSE_SUMMARY_BASE_URL
|
| 33 |
-
# to your vLLM (or other OpenAI-compatible) endpoints via Space secrets.
|
| 34 |
-
|
| 35 |
-
# --- Optional local vLLM: GenSearcher-8B (OpenAI-compatible) ---
|
| 36 |
-
if [[ "${START_VLLM_GENSEARCHER:-0}" == "1" ]]; then
|
| 37 |
-
CUDA_VISIBLE_DEVICES="${GENSEARCHER_CUDA_VISIBLE_DEVICES:-0}" \
|
| 38 |
-
vllm serve "${GENSEARCHER_MODEL_ID:-GenSearcher/Gen-Searcher-8B}" \
|
| 39 |
-
--host 0.0.0.0 \
|
| 40 |
-
--port 8002 \
|
| 41 |
-
--tensor-parallel-size "${GENSEARCHER_TP:-1}" \
|
| 42 |
-
--gpu-memory-utilization "${VLLM_GPU_MEMORY_UTIL:-0.85}" \
|
| 43 |
-
--served-model-name "${GEN_EVAL_MODEL:-Gen-Searcher-8B}" \
|
| 44 |
-
--max-model-len "${GENSEARCHER_MAX_MODEL_LEN:-65536}" \
|
| 45 |
-
--no-enable-prefix-caching &
|
| 46 |
-
wait_http "http://127.0.0.1:8002/v1/models" "GenSearcher vLLM"
|
| 47 |
-
export OPENAI_BASE_URL="${OPENAI_BASE_URL:-http://127.0.0.1:8002/v1}"
|
| 48 |
-
fi
|
| 49 |
-
|
| 50 |
-
# --- Optional local vLLM: browse summarization (Qwen3-VL) ---
|
| 51 |
-
if [[ "${START_VLLM_BROWSE:-0}" == "1" ]]; then
|
| 52 |
-
export BROWSE_GENERATE_ENGINE=vllm
|
| 53 |
-
CUDA_VISIBLE_DEVICES="${BROWSE_CUDA_VISIBLE_DEVICES:-1}" \
|
| 54 |
-
vllm serve "${BROWSE_MODEL_ID:-Qwen/Qwen3-VL-30B-A3B-Instruct}" \
|
| 55 |
-
--host 0.0.0.0 \
|
| 56 |
-
--port 8003 \
|
| 57 |
-
--tensor-parallel-size "${BROWSE_TP:-1}" \
|
| 58 |
-
--gpu-memory-utilization "${VLLM_GPU_MEMORY_UTIL:-0.85}" \
|
| 59 |
-
--served-model-name "${BROWSE_SUMMARY_MODEL:-Qwen3-VL-30B-A3B-Instruct}" \
|
| 60 |
-
--max-model-len "${BROWSE_MAX_MODEL_LEN:-65536}" \
|
| 61 |
-
--mm-processor-cache-gb 0 \
|
| 62 |
-
--no-enable-prefix-caching &
|
| 63 |
-
wait_http "http://127.0.0.1:8003/v1/models" "Browse-summary vLLM"
|
| 64 |
-
export BROWSE_SUMMARY_BASE_URL="${BROWSE_SUMMARY_BASE_URL:-http://127.0.0.1:8003/v1}"
|
| 65 |
-
fi
|
| 66 |
-
|
| 67 |
-
# --- FireRed adapter (GenSearcher /generate contract) ---
|
| 68 |
-
if [[ "${START_FIRERED_API:-1}" == "1" ]]; then
|
| 69 |
-
CUDA_VISIBLE_DEVICES="${FIRERED_CUDA_VISIBLE_DEVICES:-0}" \
|
| 70 |
-
python -m uvicorn services.firered_generate:app --host 0.0.0.0 --port 8765 &
|
| 71 |
-
wait_http "http://127.0.0.1:8765/health" "FireRed API" 120
|
| 72 |
-
export QWEN_EDIT_APP_URL="${QWEN_EDIT_APP_URL:-http://127.0.0.1:8765}"
|
| 73 |
-
else
|
| 74 |
-
echo "[entrypoint] START_FIRERED_API=0 — use external QWEN_EDIT_APP_URL for generation."
|
| 75 |
-
fi
|
| 76 |
-
|
| 77 |
-
exec python app.py
|
|
|
|
| 1 |
+
#!/usr/bin/env bash
|
| 2 |
+
set -euo pipefail
|
| 3 |
+
cd /app
|
| 4 |
+
|
| 5 |
+
export PYTHONPATH="/app/vendor/rllm:${PYTHONPATH:-}"
|
| 6 |
+
|
| 7 |
+
# Optional: load Space secrets copied to this path
|
| 8 |
+
if [[ -f /app/.env.gen_image ]]; then
|
| 9 |
+
set -a
|
| 10 |
+
# shellcheck source=/dev/null
|
| 11 |
+
source /app/.env.gen_image
|
| 12 |
+
set +a
|
| 13 |
+
fi
|
| 14 |
+
|
| 15 |
+
wait_http() {
|
| 16 |
+
local url=$1
|
| 17 |
+
local name=$2
|
| 18 |
+
local max_attempts=${3:-90}
|
| 19 |
+
local i=0
|
| 20 |
+
echo "[entrypoint] Waiting for ${name} (${url})..."
|
| 21 |
+
until curl -sf "$url" >/dev/null 2>&1; do
|
| 22 |
+
i=$((i + 1))
|
| 23 |
+
if [[ $i -ge $max_attempts ]]; then
|
| 24 |
+
echo "[entrypoint] Timeout waiting for ${name}"
|
| 25 |
+
exit 1
|
| 26 |
+
fi
|
| 27 |
+
sleep 2
|
| 28 |
+
done
|
| 29 |
+
echo "[entrypoint] ${name} is up."
|
| 30 |
+
}
|
| 31 |
+
|
| 32 |
+
# Defaults: only FireRed + Gradio in-container. Point OPENAI_BASE_URL / BROWSE_SUMMARY_BASE_URL
|
| 33 |
+
# to your vLLM (or other OpenAI-compatible) endpoints via Space secrets.
|
| 34 |
+
|
| 35 |
+
# --- Optional local vLLM: GenSearcher-8B (OpenAI-compatible) ---
|
| 36 |
+
if [[ "${START_VLLM_GENSEARCHER:-0}" == "1" ]]; then
|
| 37 |
+
CUDA_VISIBLE_DEVICES="${GENSEARCHER_CUDA_VISIBLE_DEVICES:-0}" \
|
| 38 |
+
vllm serve "${GENSEARCHER_MODEL_ID:-GenSearcher/Gen-Searcher-8B}" \
|
| 39 |
+
--host 0.0.0.0 \
|
| 40 |
+
--port 8002 \
|
| 41 |
+
--tensor-parallel-size "${GENSEARCHER_TP:-1}" \
|
| 42 |
+
--gpu-memory-utilization "${VLLM_GPU_MEMORY_UTIL:-0.85}" \
|
| 43 |
+
--served-model-name "${GEN_EVAL_MODEL:-Gen-Searcher-8B}" \
|
| 44 |
+
--max-model-len "${GENSEARCHER_MAX_MODEL_LEN:-65536}" \
|
| 45 |
+
--no-enable-prefix-caching &
|
| 46 |
+
wait_http "http://127.0.0.1:8002/v1/models" "GenSearcher vLLM"
|
| 47 |
+
export OPENAI_BASE_URL="${OPENAI_BASE_URL:-http://127.0.0.1:8002/v1}"
|
| 48 |
+
fi
|
| 49 |
+
|
| 50 |
+
# --- Optional local vLLM: browse summarization (Qwen3-VL) ---
|
| 51 |
+
if [[ "${START_VLLM_BROWSE:-0}" == "1" ]]; then
|
| 52 |
+
export BROWSE_GENERATE_ENGINE=vllm
|
| 53 |
+
CUDA_VISIBLE_DEVICES="${BROWSE_CUDA_VISIBLE_DEVICES:-1}" \
|
| 54 |
+
vllm serve "${BROWSE_MODEL_ID:-Qwen/Qwen3-VL-30B-A3B-Instruct}" \
|
| 55 |
+
--host 0.0.0.0 \
|
| 56 |
+
--port 8003 \
|
| 57 |
+
--tensor-parallel-size "${BROWSE_TP:-1}" \
|
| 58 |
+
--gpu-memory-utilization "${VLLM_GPU_MEMORY_UTIL:-0.85}" \
|
| 59 |
+
--served-model-name "${BROWSE_SUMMARY_MODEL:-Qwen3-VL-30B-A3B-Instruct}" \
|
| 60 |
+
--max-model-len "${BROWSE_MAX_MODEL_LEN:-65536}" \
|
| 61 |
+
--mm-processor-cache-gb 0 \
|
| 62 |
+
--no-enable-prefix-caching &
|
| 63 |
+
wait_http "http://127.0.0.1:8003/v1/models" "Browse-summary vLLM"
|
| 64 |
+
export BROWSE_SUMMARY_BASE_URL="${BROWSE_SUMMARY_BASE_URL:-http://127.0.0.1:8003/v1}"
|
| 65 |
+
fi
|
| 66 |
+
|
| 67 |
+
# --- FireRed adapter (GenSearcher /generate contract) ---
|
| 68 |
+
if [[ "${START_FIRERED_API:-1}" == "1" ]]; then
|
| 69 |
+
CUDA_VISIBLE_DEVICES="${FIRERED_CUDA_VISIBLE_DEVICES:-0}" \
|
| 70 |
+
python -m uvicorn services.firered_generate:app --host 0.0.0.0 --port 8765 &
|
| 71 |
+
wait_http "http://127.0.0.1:8765/health" "FireRed API" 120
|
| 72 |
+
export QWEN_EDIT_APP_URL="${QWEN_EDIT_APP_URL:-http://127.0.0.1:8765}"
|
| 73 |
+
else
|
| 74 |
+
echo "[entrypoint] START_FIRERED_API=0 — use external QWEN_EDIT_APP_URL for generation."
|
| 75 |
+
fi
|
| 76 |
+
|
| 77 |
+
exec python app.py
|