Spaces:

cloudfom
/

audience_v1

Running

App Files Files Community

audience_v1 / Dockerfile

fomext

Upload 3 files

c83a172 verified 2 months ago

Raw

History Blame Contribute Delete

3.82 kB

	# ─────────────────────────────────────────────────────────────────────────────
	# Dockerfile — Groq Audience Proxy (HuggingFace Space)
	#
	# Build & run locally:
	# docker build -t groq-proxy .
	# docker run -p 7860:7860 -e GROQ_API_KEY=gsk_... groq-proxy
	#
	# On HuggingFace Spaces:
	# 1. Push this file + groq_proxy_service.py to the Space repo root.
	# 2. Set GROQ_API_KEY as an HF Secret in the Space settings.
	# (Never hardcode the key — the Space injects it at runtime.)
	# 3. HF will auto-detect this Dockerfile and build the Space.
	#
	# The service exposes port 7860 (HuggingFace's standard public port).
	# Endpoint: POST /estimate_audience — used by music_chart_server's
	# _AudienceSlotPool._call_hf_space()
	# ─────────────────────────────────────────────────────────────────────────────

	FROM python:3.11-slim

	# ── System deps ───────────────────────────────────────────────────────────────
	RUN apt-get update \
	&& apt-get install -y --no-install-recommends \
	build-essential \
	curl \
	&& rm -rf /var/lib/apt/lists/*

	# ── Working directory ─────────────────────────────────────────────────────────
	WORKDIR /app

	# ── Python dependencies ───────────────────────────────────────────────────────
	# Copy requirements first to exploit Docker layer caching
	COPY requirements.txt .
	RUN pip install --no-cache-dir -r requirements.txt

	# ── Application code ──────────────────────────────────────────────────────────
	COPY groq_proxy_service.py .

	# ── Non-root user (HuggingFace Spaces requirement) ───────────────────────────
	RUN useradd -m -u 1000 hfuser
	USER hfuser

	# ── Runtime configuration ─────────────────────────────────────────────────────
	# GROQ_API_KEY — injected as an HF Secret; never set here
	# GROQ_MODEL — override to change the Groq model (default: llama-3.1-8b-instant)
	# SLOT_INTERVAL — seconds between Groq calls (default: 1.0 → 60 RPM)
	# MAX_TOKENS — max tokens in Groq response (default: 256)
	# PORT — HTTP port (default: 7860)

	ENV PORT=7860 \
	SLOT_INTERVAL=1.0 \
	MAX_TOKENS=256 \
	AUDIENCE_GROQ_MODEL=llama-3.1-8b-instant \
	PYTHONUNBUFFERED=1

	EXPOSE 7860

	# ── Health check ──────────────────────────────────────────────────────────────
	HEALTHCHECK --interval=30s --timeout=10s --start-period=15s --retries=3 \
	CMD curl -f http://localhost:7860/health \|\| exit 1

	# ── Entrypoint ────────────────────────────────────────────────────────────────
	CMD ["python", "-m", "uvicorn", "groq_proxy_service:app", \
	"--host", "0.0.0.0", "--port", "7860", "--log-level", "info"]