Spaces:

JSCPPProgrammer
/

gensearcher-firered

Paused

App Files Files Community

gensearcher-firered / scripts /entrypoint_body.sh

JSCPPProgrammer

Fix HF exec entrypoint: generate entrypoint.sh in Docker; body in entrypoint_body.sh

c994fd2 verified 2 months ago

raw

history blame contribute delete

4.03 kB

	# Main startup logic (run as: bash /app/scripts/entrypoint_body.sh).
	# entrypoint.sh is generated in the Dockerfile so HF can exec it without CRLF/BOM issues.
	set -euo pipefail
	cd /app

	# Same-container vLLM: PyTorch may call getpass.getuser() before USER is set in some runtimes.
	export USER="${USER:-huggingface}"
	export LOGNAME="${LOGNAME:-$USER}"
	export TORCHINDUCTOR_CACHE_DIR="${TORCHINDUCTOR_CACHE_DIR:-/tmp/torch_inductor_cache}"
	export TRITON_CACHE_DIR="${TRITON_CACHE_DIR:-/tmp/triton_cache}"

	export PYTHONPATH="/app/vendor/rllm:${PYTHONPATH:-}"

	# Optional: load Space secrets copied to this path
	if [[ -f /app/.env.gen_image ]]; then
	set -a
	# shellcheck source=/dev/null
	source /app/.env.gen_image
	set +a
	fi

	if [[ "${START_VLLM_GENSEARCHER:-0}" != "1" ]]; then
	case "${OPENAI_BASE_URL:-}" in
	127.0.0.1\|localhost)
	echo "[entrypoint] WARNING: OPENAI_BASE_URL points to loopback but START_VLLM_GENSEARCHER is not 1."
	echo "[entrypoint] The GenSearcher agent will get 'Connection error' unless a server listens here,"
	echo "[entrypoint] or you set OPENAI_BASE_URL to an external OpenAI-compatible URL (ending in /v1)."
	;;
	esac
	if [[ -z "${OPENAI_BASE_URL:-}" ]]; then
	echo "[entrypoint] OPENAI_BASE_URL is unset. For GenSearcher inside this Space only, set Space variable"
	echo "[entrypoint] START_VLLM_GENSEARCHER=1 (entrypoint will start vLLM here and set OPENAI_BASE_URL to loopback)."
	fi
	fi

	wait_http() {
	local url=$1
	local name=$2
	local max_attempts=${3:-90}
	local i=0
	echo "[entrypoint] Waiting for ${name} (${url})..."
	until curl -sf "$url" >/dev/null 2>&1; do
	i=$((i + 1))
	if [[ $i -ge $max_attempts ]]; then
	echo "[entrypoint] Timeout waiting for ${name}"
	exit 1
	fi
	sleep 2
	done
	echo "[entrypoint] ${name} is up."
	}

	# Defaults: only FireRed + Gradio in-container. Point OPENAI_BASE_URL / BROWSE_SUMMARY_BASE_URL
	# to your vLLM (or other OpenAI-compatible) endpoints via Space secrets.

	# --- Optional local vLLM: GenSearcher-8B (OpenAI-compatible) ---
	if [[ "${START_VLLM_GENSEARCHER:-0}" == "1" ]]; then
	CUDA_VISIBLE_DEVICES="${GENSEARCHER_CUDA_VISIBLE_DEVICES:-0}" \
	vllm serve "${GENSEARCHER_MODEL_ID:-GenSearcher/Gen-Searcher-8B}" \
	--host 0.0.0.0 \
	--port 8002 \
	--tensor-parallel-size "${GENSEARCHER_TP:-1}" \
	--gpu-memory-utilization "${VLLM_GPU_MEMORY_UTIL:-0.85}" \
	--served-model-name "${GEN_EVAL_MODEL:-Gen-Searcher-8B}" \
	--max-model-len "${GENSEARCHER_MAX_MODEL_LEN:-65536}" \
	--no-enable-prefix-caching &
	wait_http "http://127.0.0.1:8002/v1/models" "GenSearcher vLLM"
	export OPENAI_BASE_URL="${OPENAI_BASE_URL:-http://127.0.0.1:8002/v1}"
	fi

	# --- Optional local vLLM: browse summarization (Qwen3-VL) ---
	if [[ "${START_VLLM_BROWSE:-0}" == "1" ]]; then
	export BROWSE_GENERATE_ENGINE=vllm
	CUDA_VISIBLE_DEVICES="${BROWSE_CUDA_VISIBLE_DEVICES:-1}" \
	vllm serve "${BROWSE_MODEL_ID:-Qwen/Qwen3-VL-30B-A3B-Instruct}" \
	--host 0.0.0.0 \
	--port 8003 \
	--tensor-parallel-size "${BROWSE_TP:-1}" \
	--gpu-memory-utilization "${VLLM_GPU_MEMORY_UTIL:-0.85}" \
	--served-model-name "${BROWSE_SUMMARY_MODEL:-Qwen3-VL-30B-A3B-Instruct}" \
	--max-model-len "${BROWSE_MAX_MODEL_LEN:-65536}" \
	--mm-processor-cache-gb 0 \
	--no-enable-prefix-caching &
	wait_http "http://127.0.0.1:8003/v1/models" "Browse-summary vLLM"
	export BROWSE_SUMMARY_BASE_URL="${BROWSE_SUMMARY_BASE_URL:-http://127.0.0.1:8003/v1}"
	fi

	# --- FireRed adapter (GenSearcher /generate contract) ---
	if [[ "${START_FIRERED_API:-1}" == "1" ]]; then
	CUDA_VISIBLE_DEVICES="${FIRERED_CUDA_VISIBLE_DEVICES:-0}" \
	python -m uvicorn services.firered_generate:app --host 0.0.0.0 --port 8765 &
	wait_http "http://127.0.0.1:8765/health" "FireRed API" 120
	export QWEN_EDIT_APP_URL="${QWEN_EDIT_APP_URL:-http://127.0.0.1:8765}"
	else
	echo "[entrypoint] START_FIRERED_API=0 — use external QWEN_EDIT_APP_URL for generation."
	fi

	exec python app.py