# GapGuide backend — Hugging Face Docker Space image. # Django + DRF API with the in-process NER chain, served by gunicorn on :7860. FROM python:3.13-slim ENV PYTHONUNBUFFERED=1 \ PYTHONDONTWRITEBYTECODE=1 \ PIP_NO_CACHE_DIR=1 \ HF_HOME=/app/.cache/huggingface \ NLTK_DATA=/app/.cache/nltk # HF Spaces run the container as UID 1000. RUN useradd -m -u 1000 appuser WORKDIR /app # Python deps first for better layer caching. requirements.txt carries the # torch CPU --extra-index-url, so CPU wheels resolve here (verified: torch # 2.11/2.12 +cpu cp313 manylinux exist). COPY requirements.txt . RUN pip install --no-cache-dir -r requirements.txt \ && python -m spacy download en_core_web_sm # App code, owned by the runtime user in a single COPY (no recursive-chown bloat). COPY --chown=1000:1000 . . # Make the dirs the next steps write to owned by UID 1000 (the /app dir itself # is root-owned from WORKDIR; chown only the specific dirs, not the whole tree). RUN mkdir -p /app/.cache/huggingface /app/.cache/nltk /app/staticfiles \ && chown 1000:1000 /app /app/.cache /app/.cache/huggingface /app/.cache/nltk /app/staticfiles USER 1000 # Bake everything the NER chain downloads → fast, offline-safe first parse in prod. # Runs as UID 1000 so caches land under the owned /app/.cache. RUN python -m nltk.downloader -d /app/.cache/nltk stopwords punkt punkt_tab \ && python scripts/prefetch_models.py # Collect Django admin / DRF static for WhiteNoise. Inline build env sidesteps the # DEBUG=False "insecure SECRET_KEY" guard; it does not persist to runtime. RUN DEBUG=True SECRET_KEY=build-only python manage.py collectstatic --noinput EXPOSE 7860 # /bin/sh form survives a Windows upload (no exec-bit / shebang dependency). ENTRYPOINT ["/bin/sh", "entrypoint.sh"]