Speach-To-Text / Dockerfile
MIP-Tech's picture
Fix HF Space startup: CPU wheels, auto-detect device
27216ff
# ─────────────────────────────────────────────────────────────────────────────
# Speech-to-Text API β€” inference-only image.
#
# Works in two deployment modes:
#
# Hugging Face Spaces (default)
# PORT=7860 (HF routes external traffic here automatically)
# MODEL_PATH = HF Hub model ID, e.g. "your-username/whisper-arabic"
# HF_TOKEN = HF read token for private model repos (set as a Space secret)
#
# Local / VPS with docker-compose
# PORT=8000 (nginx sits in front on port 80 β†’ api:8000)
# MODEL_PATH = /models/merged_model (mounted volume)
#
# Set PORT and MODEL_PATH via environment variables β€” no rebuild needed.
# ─────────────────────────────────────────────────────────────────────────────
FROM python:3.11-slim
RUN apt-get update && apt-get install -y --no-install-recommends \
libsndfile1 \
ffmpeg \
curl \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /app
COPY requirements-api.txt .
# CPU wheels are much smaller (~200 MB vs ~2.5 GB for CUDA) and work on
# both HF Spaces CPU hardware and local machines without a GPU.
# For GPU deployment, override at build time:
# docker build --build-arg TORCH_INDEX=https://download.pytorch.org/whl/cu126
ARG TORCH_INDEX=https://download.pytorch.org/whl/cpu
RUN pip install --no-cache-dir --timeout=600 \
"torch>=2.1.0" \
"torchaudio>=2.1.0" \
--index-url ${TORCH_INDEX}
RUN pip install --no-cache-dir --timeout=300 -r requirements-api.txt
# Inference code and API only β€” no training, no data-prep, no raw data.
COPY src/__init__.py src/__init__.py
COPY src/inference/ src/inference/
COPY api/ api/
# HF Spaces default is 7860.
# docker-compose overrides this to 8000 for local deployment.
ENV PORT=7860 \
MODEL_PATH=openai/whisper-large-v3 \
DEVICE= \
HF_HOME=/app/.cache/huggingface \
PYTHONUNBUFFERED=1 \
PYTHONDONTWRITEBYTECODE=1
EXPOSE 7860
HEALTHCHECK --interval=30s --timeout=10s --start-period=180s --retries=5 \
CMD curl -f http://localhost:${PORT}/health || exit 1
# Shell form so $PORT is expanded at runtime.
CMD uvicorn api.main:app --host 0.0.0.0 --port ${PORT} --workers 1 --log-level info