# Container image for the multimodal affect-screening app. # Works locally (docker run) and on Hugging Face Spaces (Docker SDK, port 7860). FROM python:3.11-slim # System libs: ffmpeg (decode browser webm/ogg audio), libGL + glib (OpenCV). RUN apt-get update && apt-get install -y --no-install-recommends \ ffmpeg libgl1 libglib2.0-0 \ && rm -rf /var/lib/apt/lists/* ENV PYTHONUNBUFFERED=1 \ HF_HOME=/app/hf_cache \ HF_HUB_DISABLE_TELEMETRY=1 \ OMP_NUM_THREADS=2 \ PORT=7860 WORKDIR /app # CPU-only PyTorch first (small wheel), then the rest of the deps. RUN pip install --no-cache-dir torch --index-url https://download.pytorch.org/whl/cpu COPY requirements-modern.txt . RUN pip install --no-cache-dir -r requirements-modern.txt # Pre-download the Transformer models into the image so first request is fast. RUN python -c "from transformers import pipeline; \ pipeline('image-classification', model='trpakov/vit-face-expression'); \ pipeline('audio-classification', model='superb/wav2vec2-base-superb-er')" # App code (models/ holds only the Haar cascade now). COPY . . EXPOSE 7860 # One worker (models load lazily per process); threads handle light concurrency. CMD ["sh", "-c", "gunicorn --bind 0.0.0.0:${PORT:-7860} --workers 1 --threads 2 --timeout 300 app:app"]