FROM python:3.10-slim

# ── System deps ────────────────────────────────────────────────────────────────
# Rust + cargo needed for DeepFilterNet (df package)
# build-essential needed for speechbrain native extensions
RUN apt-get update && apt-get install -y \
    ffmpeg git curl \
    build-essential \
    && curl https://sh.rustup.rs -sSf | sh -s -- -y \
    && rm -rf /var/lib/apt/lists/*

# Put cargo/rustc on PATH for subsequent RUN steps
ENV PATH="/root/.cargo/bin:${PATH}"

WORKDIR /app

# ── PyTorch CPU ────────────────────────────────────────────────────────────────
RUN pip install --no-cache-dir torch torchaudio \
    --index-url https://download.pytorch.org/whl/cpu

# ── Core app deps (unchanged from your original) ──────────────────────────────
RUN pip install --no-cache-dir \
    fastapi uvicorn \
    requests \
    groq \
    deep-translator transformers tokenizers \
    huggingface_hub sentencepiece sacremoses \
    soundfile noisereduce numpy pyloudnorm \
    librosa ffmpeg-python faster-whisper \
    cloudinary

# ── Denoiser v2 additions ──────────────────────────────────────────────────────
# DeepFilterNet  — SOTA noise suppression, now possible because Rust is installed
# speechbrain    — SepFormer enhancement model (HF weights, CPU-safe)
# jellyfish      — Jaro-Winkler similarity for phonetic stutter detection
RUN pip install --no-cache-dir \
    deepfilternet \
    jellyfish

COPY . .

RUN useradd -m -u 1000 user
USER user

ENV HF_HOME=/app/.cache/huggingface
ENV TRANSFORMERS_CACHE=/app/.cache/huggingface
ENV HOME=/home/user

# Pre-download DeepFilterNet weights at build time so first request isn't slow
# (runs as root before USER switch — weights land in /app/.cache)
RUN python -c "from df.enhance import init_df; init_df()" || true

EXPOSE 7860

CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]