Spaces:

msse-team-3
/

ai-engineering-project

Sleeping

GitHub Action

Clean deployment without binary files

f884e6e 6 months ago

3.94 kB

	#!/usr/bin/env bash
	set -euo pipefail

	# Default to 1 worker to prevent OOM on low-memory hosts
	WORKERS_VALUE="${WORKERS:-1}"
	TIMEOUT_VALUE="${TIMEOUT:-120}"
	PORT_VALUE="${PORT:-8080}"

	# HuggingFace Services - No database initialization needed
	echo "Starting HuggingFace-powered application..."
	echo "Using HF services: Embedding API, Inference API, Dataset storage"

	# Check for HF_TOKEN (optional - app will warn if missing)
	if [ -n "${HF_TOKEN:-}" ]; then
	echo "✅ HF_TOKEN configured - HF services enabled"
	else
	echo "⚠️ HF_TOKEN not set - some features may be limited"
	fi

	echo "Starting gunicorn on port ${PORT_VALUE} with ${WORKERS_VALUE} workers and timeout ${TIMEOUT_VALUE}s"
	export PYTHONPATH="/app${PYTHONPATH:+:$PYTHONPATH}"

	# Determine gunicorn config usage
	GUNICORN_CONFIG_ARG=""
	if [ -f gunicorn.conf.py ]; then
	GUNICORN_CONFIG_ARG="--config gunicorn.conf.py"
	else
	echo "Warning: gunicorn.conf.py not found; starting with inline CLI options only."
	fi

	# Start gunicorn in background so we can trap signals and collect diagnostics
	gunicorn \
	--bind 0.0.0.0:${PORT_VALUE} \
	--workers "${WORKERS_VALUE}" \
	--timeout "${TIMEOUT_VALUE}" \
	--log-level info \
	--access-logfile - \
	--error-logfile - \
	--capture-output \
	${GUNICORN_CONFIG_ARG} \
	app:app &

	GUNICORN_PID=$!

	# Trap TERM and INT, log diagnostics, forward the signal to gunicorn, and wait
	handle_term() {
	echo "===== SIGTERM received at $(date -u +'%Y-%m-%dT%H:%M:%SZ') ====="
	echo "--- Top processes by RSS ---"
	ps aux --sort=-rss \| head -n 20 \|\| true
	echo "--- /proc/meminfo (if available) ---"
	cat /proc/meminfo \|\| true
	echo "Forwarding SIGTERM to gunicorn (pid ${GUNICORN_PID})"
	kill -TERM "${GUNICORN_PID}" 2>/dev/null \|\| true
	# Wait for gunicorn to exit
	wait "${GUNICORN_PID}" \|\| true
	echo "Gunicorn exited; wrapper exiting"
	exit 0
	}
	trap 'handle_term' SIGTERM SIGINT

	# Readiness probe loop
	echo "Waiting for application readiness (health endpoint)..."
	READY_TIMEOUT="${READY_TIMEOUT:-60}" # total seconds to wait
	READY_INTERVAL="${READY_INTERVAL:-3}" # seconds between checks
	ELAPSED=0
	READY=0
	while [ "$ELAPSED" -lt "$READY_TIMEOUT" ]; do
	if ! kill -0 "${GUNICORN_PID}" 2>/dev/null; then
	echo "Gunicorn process exited prematurely during startup; aborting." >&2
	exit 1
	fi
	if curl -fsS "http://localhost:${PORT_VALUE}/health" >/dev/null 2>&1; then
	READY=1
	break
	fi
	sleep "$READY_INTERVAL"
	ELAPSED=$((ELAPSED + READY_INTERVAL))
	done
	if [ "$READY" -ne 1 ]; then
	echo "Health endpoint not ready after ${READY_TIMEOUT}s; continuing but marking as degraded." >&2
	fi

	# Pre-warm (chat) if health is ready
	echo "Pre-warming application via /chat endpoint..."
	curl -sS -X POST http://localhost:${PORT_VALUE}/chat \
	-H "Content-Type: application/json" \
	-d '{"message":"pre-warm"}' \
	--max-time 30 --fail >/dev/null 2>&1 \|\| echo "Pre-warm request failed but continuing..."

	# Explicit embedding warm-up to surface ONNX model issues early.
	echo "Running embedding warm-up..."
	if python - <<'PY'
	import time, logging
	from src.embedding.embedding_service import EmbeddingService
	start = time.time()
	try:
	svc = EmbeddingService()
	emb = svc.embed_text("warmup")
	dur = (time.time() - start) * 1000
	print(f"Embedding warm-up successful; dim={len(emb)}; duration_ms={dur:.1f}")
	except Exception as e:
	dur = (time.time() - start) * 1000
	print(f"Embedding warm-up FAILED after {dur:.1f}ms: {e}")
	raise SystemExit(1)
	PY
	then
	echo "Embedding warm-up succeeded."
	else
	echo "Embedding warm-up failed; terminating startup to allow redeploy/retry." >&2
	kill -TERM "${GUNICORN_PID}" 2>/dev/null \|\| true
	wait "${GUNICORN_PID}" \|\| true
	exit 1
	fi

	echo "Server is running (PID ${GUNICORN_PID})."

	# Wait for gunicorn to exit and forward its exit code
	wait "${GUNICORN_PID}"
	EXIT_CODE=$?
	echo "Gunicorn stopped with exit code ${EXIT_CODE}"
	exit "${EXIT_CODE}"