# ───────────────────────────────────────────────────────────────────────────── # Speech-to-Text API — inference-only image. # # Works in two deployment modes: # # Hugging Face Spaces (default) # PORT=7860 (HF routes external traffic here automatically) # MODEL_PATH = HF Hub model ID, e.g. "your-username/whisper-arabic" # HF_TOKEN = HF read token for private model repos (set as a Space secret) # # Local / VPS with docker-compose # PORT=8000 (nginx sits in front on port 80 → api:8000) # MODEL_PATH = /models/merged_model (mounted volume) # # Set PORT and MODEL_PATH via environment variables — no rebuild needed. # ───────────────────────────────────────────────────────────────────────────── FROM python:3.11-slim RUN apt-get update && apt-get install -y --no-install-recommends \ libsndfile1 \ ffmpeg \ curl \ && rm -rf /var/lib/apt/lists/* WORKDIR /app COPY requirements-api.txt . # CPU wheels are much smaller (~200 MB vs ~2.5 GB for CUDA) and work on # both HF Spaces CPU hardware and local machines without a GPU. # For GPU deployment, override at build time: # docker build --build-arg TORCH_INDEX=https://download.pytorch.org/whl/cu126 ARG TORCH_INDEX=https://download.pytorch.org/whl/cpu RUN pip install --no-cache-dir --timeout=600 \ "torch>=2.1.0" \ "torchaudio>=2.1.0" \ --index-url ${TORCH_INDEX} RUN pip install --no-cache-dir --timeout=300 -r requirements-api.txt # Inference code and API only — no training, no data-prep, no raw data. COPY src/__init__.py src/__init__.py COPY src/inference/ src/inference/ COPY api/ api/ # HF Spaces default is 7860. # docker-compose overrides this to 8000 for local deployment. ENV PORT=7860 \ MODEL_PATH=openai/whisper-large-v3 \ DEVICE= \ HF_HOME=/app/.cache/huggingface \ PYTHONUNBUFFERED=1 \ PYTHONDONTWRITEBYTECODE=1 EXPOSE 7860 HEALTHCHECK --interval=30s --timeout=10s --start-period=180s --retries=5 \ CMD curl -f http://localhost:${PORT}/health || exit 1 # Shell form so $PORT is expanded at runtime. CMD uvicorn api.main:app --host 0.0.0.0 --port ${PORT} --workers 1 --log-level info