# syntax=docker/dockerfile:1.7 # # Multi-stage build: # 1. ``converter`` – best-effort conversion of an optional Kazakh-fine-tuned # Whisper model from HuggingFace into CTranslate2 / faster-whisper # format. ``transformers`` and ``torch`` live ONLY in this stage. # 2. ``runtime`` – installs only the API / ML deps, copies the converted # KK weights (if any), generates the seed dataset and trains the # ensemble classifier. # # The primary model is always vanilla ``openai/whisper-small`` (loaded by # faster-whisper at startup, no conversion needed). The KK model is an # optional academic fine-tune (``akuzdeuov/whisper-base.kk``, 15.36% WER on # the 1000-hour Kazakh Speech Corpus 2). If KK conversion fails for any # reason the runtime simply uses the primary model for Kazakh too. # --------------------------------------------------------------------------- # Stage 1 — convert the optional Kazakh fine-tune from HF to CT2 # --------------------------------------------------------------------------- FROM python:3.11-slim AS converter ENV PIP_NO_CACHE_DIR=1 \ PIP_DISABLE_PIP_VERSION_CHECK=1 \ HF_HOME=/cache/hf RUN apt-get update \ && apt-get install -y --no-install-recommends ca-certificates \ && rm -rf /var/lib/apt/lists/* # transformers + torch are needed by ct2-transformers-converter to load # the HuggingFace weights before conversion. CPU torch keeps it small-ish. RUN pip install \ --index-url https://download.pytorch.org/whl/cpu \ torch==2.4.1 # ctranslate2 4.6.0 is the only version in 4.x that: # - has the executable-stack fix (broken in 4.4 and 4.5) # - does NOT pass the 'dtype' kwarg incompatible with WhisperForConditionalGeneration # (introduced in 4.7+) RUN pip install \ transformers==4.45.2 \ ctranslate2==4.6.0 \ huggingface-hub==0.25.2 \ sentencepiece==0.2.0 \ safetensors==0.4.5 ARG WHISPER_KK_FINETUNE=akuzdeuov/whisper-base.kk COPY ml_training/convert_whisper.py /tmp/convert_whisper.py # Convert the KK fine-tune. Failures are tolerated — the runtime falls # back to the primary vanilla model for Kazakh in that case. RUN mkdir -p /converted && \ case "$WHISPER_KK_FINETUNE" in \ ""|"none"|"NONE") \ echo ">>> WHISPER_KK_FINETUNE is empty — no KK fine-tune will be built."; \ ;; \ tiny|base|small|medium|large|large-v1|large-v2|large-v3) \ echo ">>> WHISPER_KK_FINETUNE='$WHISPER_KK_FINETUNE' is a vanilla size, skipping conversion."; \ ;; \ *) \ echo ">>> Converting '$WHISPER_KK_FINETUNE' to CTranslate2 ..."; \ if python /tmp/convert_whisper.py \ --model "$WHISPER_KK_FINETUNE" \ --output /converted/kk \ --quantization int8; then \ echo ">>> Conversion of '$WHISPER_KK_FINETUNE' SUCCEEDED."; \ else \ echo ">>> CONVERSION FAILED — KK will use primary vanilla model at runtime."; \ rm -rf /converted/kk; \ fi \ ;; \ esac && \ echo "--- /converted ---" && ls -la /converted/ && \ if [ -d /converted/kk ]; then echo "--- /converted/kk ---"; ls -la /converted/kk/; fi # --------------------------------------------------------------------------- # Stage 2 — runtime # --------------------------------------------------------------------------- FROM python:3.11-slim AS runtime ENV PYTHONUNBUFFERED=1 \ PYTHONDONTWRITEBYTECODE=1 \ PYTHONUTF8=1 \ PYTHONIOENCODING=utf-8 \ PIP_NO_CACHE_DIR=1 \ PIP_DISABLE_PIP_VERSION_CHECK=1 \ OMP_NUM_THREADS=4 \ HF_HOME=/app/.cache/hf \ HF_HUB_DOWNLOAD_TIMEOUT=120 RUN apt-get update \ && apt-get install -y --no-install-recommends libgomp1 ca-certificates curl \ && rm -rf /var/lib/apt/lists/* WORKDIR /app RUN pip install --upgrade pip setuptools wheel \ && pip install \ fastapi==0.115.0 \ uvicorn[standard]==0.30.6 \ pydantic==2.9.2 \ pydantic-settings==2.5.2 \ sqlalchemy==2.0.35 \ python-multipart==0.0.12 \ scikit-learn==1.5.2 \ scipy==1.14.1 \ joblib==1.4.2 \ numpy==1.26.4 \ pandas==2.2.3 \ requests==2.32.3 \ huggingface-hub==0.25.2 \ faster-whisper==1.0.3 COPY app /app/app COPY ml_training /app/ml_training # Pull the converted Kazakh CT2 weights (if conversion succeeded). If not, # /app/models/whisper-kk will be missing /kk subdirectory and the loader's # fallback uses the primary model for Kazakh. COPY --from=converter /converted /app/models/whisper-kk RUN mkdir -p /app/models /app/data \ && python -m ml_training.generate_dataset \ && python -m ml_training.train EXPOSE 8000 HEALTHCHECK --interval=30s --timeout=10s --start-period=120s --retries=3 \ CMD curl -fs http://localhost:8000/v1/health || exit 1 CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000", "--workers", "1"]