# Container image for the multimodal affect-screening app.
# Works locally (docker run) and on Hugging Face Spaces (Docker SDK, port 7860).
FROM python:3.11-slim

# System libs: ffmpeg (decode browser webm/ogg audio), libGL + glib (OpenCV).
RUN apt-get update && apt-get install -y --no-install-recommends \
        ffmpeg libgl1 libglib2.0-0 \
    && rm -rf /var/lib/apt/lists/*

ENV PYTHONUNBUFFERED=1 \
    HF_HOME=/app/hf_cache \
    HF_HUB_DISABLE_TELEMETRY=1 \
    OMP_NUM_THREADS=2 \
    PORT=7860

WORKDIR /app

# CPU-only PyTorch first (small wheel), then the rest of the deps.
RUN pip install --no-cache-dir torch --index-url https://download.pytorch.org/whl/cpu
COPY requirements-modern.txt .
RUN pip install --no-cache-dir -r requirements-modern.txt

# Pre-download the Transformer models into the image so first request is fast.
RUN python -c "from transformers import pipeline; \
    pipeline('image-classification', model='trpakov/vit-face-expression'); \
    pipeline('audio-classification', model='superb/wav2vec2-base-superb-er')"

# App code (models/ holds only the Haar cascade now).
COPY . .

EXPOSE 7860

# One worker (models load lazily per process); threads handle light concurrency.
CMD ["sh", "-c", "gunicorn --bind 0.0.0.0:${PORT:-7860} --workers 1 --threads 2 --timeout 300 app:app"]