# ────────────────────────────────────────────────────────────────────────── # Dockerfile – Children's Learning Router Service # Target: Hugging Face Spaces (CPU-only, Docker SDK) # Port: 7860 (required by HF Spaces) # # Model delivery: via `preload_from_hub` in README.md # HF Spaces downloads Qwen/Qwen2.5-1.5B-Instruct before container start # and places it under /repo-cache (HF_HOME=/repo-cache). # No in-build download is needed or possible (build env has no internet). # # OOM mitigation: packages are installed in small isolated groups so pip's # dependency resolver never spikes RAM. --no-cache-dir and --no-compile # keep peak memory low throughout the build. # ────────────────────────────────────────────────────────────────────────── FROM python:3.10-slim # ── System packages ─────────────────────────────────────────────────────── RUN apt-get update && apt-get install -y --no-install-recommends \ build-essential \ git \ curl \ && rm -rf /var/lib/apt/lists/* # ── Working directory ───────────────────────────────────────────────────── WORKDIR /app # ── Pip hygiene: upgrade pip/wheel first (small, fast) ─────────────────── RUN pip install --no-cache-dir --no-compile --upgrade pip wheel # ── 1 of 4 · CPU-only PyTorch (largest wheel – install alone) ──────────── RUN pip install --no-cache-dir --no-compile \ torch==2.3.1 \ --index-url https://download.pytorch.org/whl/cpu # ── 2 of 4 · HuggingFace stack (transformers pulls in tokenizers etc.) ─── RUN pip install --no-cache-dir --no-compile \ transformers==4.46.3 \ accelerate==1.1.1 # ── 3 of 4 · Serialisation libs ────────────────────────────────────────── RUN pip install --no-cache-dir --no-compile \ sentencepiece==0.2.0 \ protobuf==5.28.3 # ── 4 of 4 · Async HTTP client + Web framework + ASGI server ───────────── RUN pip install --no-cache-dir --no-compile \ httpx==0.27.2 \ fastapi==0.115.0 \ uvicorn[standard]==0.30.6 # ── Application code ────────────────────────────────────────────────────── COPY app.py . # ── HuggingFace Spaces: run as non-root user (UID 1000) ────────────────── # mkdir -p /repo-cache/hub ensures the cache path exists and is writable # by hfuser whether HF Spaces pre-populates it or the model downloads fresh. RUN useradd -m -u 1000 hfuser \ && mkdir -p /repo-cache/hub \ && chown -R hfuser:hfuser /app /repo-cache USER hfuser # ── Runtime config ──────────────────────────────────────────────────────── # HF Spaces sets HF_HOME=/repo-cache and places preload_from_hub models # there before the container starts. HF_HOME alone is sufficient; # TRANSFORMERS_CACHE is deprecated since transformers v4 and removed in v5. ENV PYTHONUNBUFFERED=1 \ PYTHONDONTWRITEBYTECODE=1 \ HF_HOME=/repo-cache EXPOSE 7860 # ── Start-up command ────────────────────────────────────────────────────── # Plain uvicorn — no gunicorn shim. Eliminates the gunicorn health-check # race that was killing the worker mid-response and causing 502s. # timeout-keep-alive covers the full CPU inference time for the 3B model. CMD ["uvicorn", "app:app", \ "--host", "0.0.0.0", \ "--port", "7860", \ "--timeout-keep-alive", "300", \ "--log-level", "info"]