FROM python:3.11-slim

ENV DEBIAN_FRONTEND=noninteractive \
    PYTHONUNBUFFERED=1 \
    PIP_NO_CACHE_DIR=1 \
    HF_HOME=/home/user/.cache/huggingface \
    XDG_CACHE_HOME=/home/user/.cache \
    TRANSFORMERS_CACHE=/home/user/.cache/huggingface/hub \
    NLTK_DATA=/home/user/nltk_data \
    WHISPER_MODEL=large-v3 \
    WHISPER_DEVICE=cpu \
    SPACY_MODEL=en_core_web_trf

# System deps for audio + builds
RUN apt-get update && apt-get install -y --no-install-recommends \
        ffmpeg \
        libsndfile1 \
        build-essential \
        curl \
        git \
    && rm -rf /var/lib/apt/lists/*

# HF Spaces runs as user 1000; create matching user
RUN useradd -m -u 1000 user
USER user
ENV PATH="/home/user/.local/bin:${PATH}"
WORKDIR /home/user/app

# CPU-only PyTorch wheel (accuracy identical to GPU; slower runtime)
RUN pip install --user --no-cache-dir \
        --index-url https://download.pytorch.org/whl/cpu \
        torch==2.4.1

# Remaining Python deps (drop pinned torch; pull CPU wheel above)
COPY --chown=user:user requirements.txt .
RUN grep -v '^torch' requirements.txt > requirements.nogpu.txt \
 && pip install --user --no-cache-dir -r requirements.nogpu.txt

# Pre-download models at build time — accuracy-first choices, no runtime download
# Whisper large-v3 (2.9 GB) — highest accuracy for Indian children's L2 English
RUN python -c "import whisper; whisper.load_model('large-v3')"
# spaCy transformer pipeline (440 MB) — accuracy-first over en_core_web_lg
RUN python -m spacy download en_core_web_trf
# NLTK corpora (small)
RUN python -c "import nltk; [nltk.download(p, download_dir='/home/user/nltk_data') for p in ['punkt','punkt_tab','averaged_perceptron_tagger','averaged_perceptron_tagger_eng']]"

# Copy application source
COPY --chown=user:user . .

# Writable uploads dir (ephemeral on free tier)
RUN mkdir -p /home/user/app/uploads

# HF Spaces default port
EXPOSE 7860

HEALTHCHECK --interval=30s --timeout=10s --start-period=120s --retries=3 \
    CMD curl -f http://localhost:7860/health || exit 1

CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]