Spaces:
Sleeping
Sleeping
File size: 4,401 Bytes
d7182a3 a27072c d7182a3 a27072c d7182a3 a27072c d7182a3 a27072c d7182a3 a27072c d7182a3 a27072c | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 | # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# Dockerfile β Children's Learning Router Service
# Target: Hugging Face Spaces (CPU-only, Docker SDK)
# Port: 7860 (required by HF Spaces)
#
# Model delivery: via `preload_from_hub` in README.md
# HF Spaces downloads Qwen/Qwen2.5-1.5B-Instruct before container start
# and places it under /repo-cache (HF_HOME=/repo-cache).
# No in-build download is needed or possible (build env has no internet).
#
# OOM mitigation: packages are installed in small isolated groups so pip's
# dependency resolver never spikes RAM. --no-cache-dir and --no-compile
# keep peak memory low throughout the build.
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
FROM python:3.10-slim
# ββ System packages βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
git \
curl \
&& rm -rf /var/lib/apt/lists/*
# ββ Working directory βββββββββββββββββββββββββββββββββββββββββββββββββββββ
WORKDIR /app
# ββ Pip hygiene: upgrade pip/wheel first (small, fast) βββββββββββββββββββ
RUN pip install --no-cache-dir --no-compile --upgrade pip wheel
# ββ 1 of 4 Β· CPU-only PyTorch (largest wheel β install alone) ββββββββββββ
RUN pip install --no-cache-dir --no-compile \
torch==2.3.1 \
--index-url https://download.pytorch.org/whl/cpu
# ββ 2 of 4 Β· HuggingFace stack (transformers pulls in tokenizers etc.) βββ
RUN pip install --no-cache-dir --no-compile \
transformers==4.46.3 \
accelerate==1.1.1
# ββ 3 of 4 Β· Serialisation libs ββββββββββββββββββββββββββββββββββββββββββ
RUN pip install --no-cache-dir --no-compile \
sentencepiece==0.2.0 \
protobuf==5.28.3
# ββ 4 of 4 Β· Async HTTP client + Web framework + ASGI server βββββββββββββ
RUN pip install --no-cache-dir --no-compile \
httpx==0.27.2 \
fastapi==0.115.0 \
uvicorn[standard]==0.30.6
# ββ Application code ββββββββββββββββββββββββββββββββββββββββββββββββββββββ
COPY app.py .
# ββ HuggingFace Spaces: run as non-root user (UID 1000) ββββββββββββββββββ
# mkdir -p /repo-cache/hub ensures the cache path exists and is writable
# by hfuser whether HF Spaces pre-populates it or the model downloads fresh.
RUN useradd -m -u 1000 hfuser \
&& mkdir -p /repo-cache/hub \
&& chown -R hfuser:hfuser /app /repo-cache
USER hfuser
# ββ Runtime config ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# HF Spaces sets HF_HOME=/repo-cache and places preload_from_hub models
# there before the container starts. HF_HOME alone is sufficient;
# TRANSFORMERS_CACHE is deprecated since transformers v4 and removed in v5.
ENV PYTHONUNBUFFERED=1 \
PYTHONDONTWRITEBYTECODE=1 \
HF_HOME=/repo-cache
EXPOSE 7860
# ββ Start-up command ββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# Plain uvicorn β no gunicorn shim. Eliminates the gunicorn health-check
# race that was killing the worker mid-response and causing 502s.
# timeout-keep-alive covers the full CPU inference time for the 3B model.
CMD ["uvicorn", "app:app", \
"--host", "0.0.0.0", \
"--port", "7860", \
"--timeout-keep-alive", "300", \
"--log-level", "info"] |