vaani-cavp-engine / Dockerfile
Shaankar39's picture
init: Vaani CAVP engine (CPU, accuracy-first β€” Whisper large-v3, spaCy trf)
7d5f092
FROM python:3.11-slim
ENV DEBIAN_FRONTEND=noninteractive \
PYTHONUNBUFFERED=1 \
PIP_NO_CACHE_DIR=1 \
HF_HOME=/home/user/.cache/huggingface \
XDG_CACHE_HOME=/home/user/.cache \
TRANSFORMERS_CACHE=/home/user/.cache/huggingface/hub \
NLTK_DATA=/home/user/nltk_data \
WHISPER_MODEL=large-v3 \
WHISPER_DEVICE=cpu \
SPACY_MODEL=en_core_web_trf
# System deps for audio + builds
RUN apt-get update && apt-get install -y --no-install-recommends \
ffmpeg \
libsndfile1 \
build-essential \
curl \
git \
&& rm -rf /var/lib/apt/lists/*
# HF Spaces runs as user 1000; create matching user
RUN useradd -m -u 1000 user
USER user
ENV PATH="/home/user/.local/bin:${PATH}"
WORKDIR /home/user/app
# CPU-only PyTorch wheel (accuracy identical to GPU; slower runtime)
RUN pip install --user --no-cache-dir \
--index-url https://download.pytorch.org/whl/cpu \
torch==2.4.1
# Remaining Python deps (drop pinned torch; pull CPU wheel above)
COPY --chown=user:user requirements.txt .
RUN grep -v '^torch' requirements.txt > requirements.nogpu.txt \
&& pip install --user --no-cache-dir -r requirements.nogpu.txt
# Pre-download models at build time β€” accuracy-first choices, no runtime download
# Whisper large-v3 (2.9 GB) β€” highest accuracy for Indian children's L2 English
RUN python -c "import whisper; whisper.load_model('large-v3')"
# spaCy transformer pipeline (440 MB) β€” accuracy-first over en_core_web_lg
RUN python -m spacy download en_core_web_trf
# NLTK corpora (small)
RUN python -c "import nltk; [nltk.download(p, download_dir='/home/user/nltk_data') for p in ['punkt','punkt_tab','averaged_perceptron_tagger','averaged_perceptron_tagger_eng']]"
# Copy application source
COPY --chown=user:user . .
# Writable uploads dir (ephemeral on free tier)
RUN mkdir -p /home/user/app/uploads
# HF Spaces default port
EXPOSE 7860
HEALTHCHECK --interval=30s --timeout=10s --start-period=120s --retries=3 \
CMD curl -f http://localhost:7860/health || exit 1
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]