File size: 2,495 Bytes
0db822c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27216ff
 
 
 
 
 
 
 
 
0db822c
 
 
 
 
 
 
 
 
 
27216ff
 
0db822c
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# ─────────────────────────────────────────────────────────────────────────────
# Speech-to-Text API β€” inference-only image.
#
# Works in two deployment modes:
#
#   Hugging Face Spaces (default)
#     PORT=7860  (HF routes external traffic here automatically)
#     MODEL_PATH = HF Hub model ID, e.g. "your-username/whisper-arabic"
#     HF_TOKEN   = HF read token for private model repos (set as a Space secret)
#
#   Local / VPS with docker-compose
#     PORT=8000  (nginx sits in front on port 80 β†’ api:8000)
#     MODEL_PATH = /models/merged_model  (mounted volume)
#
# Set PORT and MODEL_PATH via environment variables β€” no rebuild needed.
# ─────────────────────────────────────────────────────────────────────────────

FROM python:3.11-slim

RUN apt-get update && apt-get install -y --no-install-recommends \
        libsndfile1 \
        ffmpeg \
        curl \
    && rm -rf /var/lib/apt/lists/*

WORKDIR /app

COPY requirements-api.txt .
# CPU wheels are much smaller (~200 MB vs ~2.5 GB for CUDA) and work on
# both HF Spaces CPU hardware and local machines without a GPU.
# For GPU deployment, override at build time:
#   docker build --build-arg TORCH_INDEX=https://download.pytorch.org/whl/cu126
ARG TORCH_INDEX=https://download.pytorch.org/whl/cpu
RUN pip install --no-cache-dir --timeout=600 \
        "torch>=2.1.0" \
        "torchaudio>=2.1.0" \
        --index-url ${TORCH_INDEX}
RUN pip install --no-cache-dir --timeout=300 -r requirements-api.txt

# Inference code and API only β€” no training, no data-prep, no raw data.
COPY src/__init__.py  src/__init__.py
COPY src/inference/   src/inference/
COPY api/             api/

# HF Spaces default is 7860.
# docker-compose overrides this to 8000 for local deployment.
ENV PORT=7860 \
    MODEL_PATH=openai/whisper-large-v3 \
    DEVICE= \
    HF_HOME=/app/.cache/huggingface \
    PYTHONUNBUFFERED=1 \
    PYTHONDONTWRITEBYTECODE=1

EXPOSE 7860

HEALTHCHECK --interval=30s --timeout=10s --start-period=180s --retries=5 \
    CMD curl -f http://localhost:${PORT}/health || exit 1

# Shell form so $PORT is expanded at runtime.
CMD uvicorn api.main:app --host 0.0.0.0 --port ${PORT} --workers 1 --log-level info