Spaces:
Sleeping
Sleeping
| # syntax=docker/dockerfile:1.7 | |
| # | |
| # Multi-stage build: | |
| # 1. ``converter`` – best-effort conversion of an optional Kazakh-fine-tuned | |
| # Whisper model from HuggingFace into CTranslate2 / faster-whisper | |
| # format. ``transformers`` and ``torch`` live ONLY in this stage. | |
| # 2. ``runtime`` – installs only the API / ML deps, copies the converted | |
| # KK weights (if any), generates the seed dataset and trains the | |
| # ensemble classifier. | |
| # | |
| # The primary model is always vanilla ``openai/whisper-small`` (loaded by | |
| # faster-whisper at startup, no conversion needed). The KK model is an | |
| # optional academic fine-tune (``akuzdeuov/whisper-base.kk``, 15.36% WER on | |
| # the 1000-hour Kazakh Speech Corpus 2). If KK conversion fails for any | |
| # reason the runtime simply uses the primary model for Kazakh too. | |
| # --------------------------------------------------------------------------- | |
| # Stage 1 — convert the optional Kazakh fine-tune from HF to CT2 | |
| # --------------------------------------------------------------------------- | |
| FROM python:3.11-slim AS converter | |
| ENV PIP_NO_CACHE_DIR=1 \ | |
| PIP_DISABLE_PIP_VERSION_CHECK=1 \ | |
| HF_HOME=/cache/hf | |
| RUN apt-get update \ | |
| && apt-get install -y --no-install-recommends ca-certificates \ | |
| && rm -rf /var/lib/apt/lists/* | |
| # transformers + torch are needed by ct2-transformers-converter to load | |
| # the HuggingFace weights before conversion. CPU torch keeps it small-ish. | |
| RUN pip install \ | |
| --index-url https://download.pytorch.org/whl/cpu \ | |
| torch==2.4.1 | |
| # ctranslate2 4.6.0 is the only version in 4.x that: | |
| # - has the executable-stack fix (broken in 4.4 and 4.5) | |
| # - does NOT pass the 'dtype' kwarg incompatible with WhisperForConditionalGeneration | |
| # (introduced in 4.7+) | |
| RUN pip install \ | |
| transformers==4.45.2 \ | |
| ctranslate2==4.6.0 \ | |
| huggingface-hub==0.25.2 \ | |
| sentencepiece==0.2.0 \ | |
| safetensors==0.4.5 | |
| ARG WHISPER_KK_FINETUNE=akuzdeuov/whisper-base.kk | |
| COPY ml_training/convert_whisper.py /tmp/convert_whisper.py | |
| # Convert the KK fine-tune. Failures are tolerated — the runtime falls | |
| # back to the primary vanilla model for Kazakh in that case. | |
| RUN mkdir -p /converted && \ | |
| case "$WHISPER_KK_FINETUNE" in \ | |
| ""|"none"|"NONE") \ | |
| echo ">>> WHISPER_KK_FINETUNE is empty — no KK fine-tune will be built."; \ | |
| ;; \ | |
| tiny|base|small|medium|large|large-v1|large-v2|large-v3) \ | |
| echo ">>> WHISPER_KK_FINETUNE='$WHISPER_KK_FINETUNE' is a vanilla size, skipping conversion."; \ | |
| ;; \ | |
| *) \ | |
| echo ">>> Converting '$WHISPER_KK_FINETUNE' to CTranslate2 ..."; \ | |
| if python /tmp/convert_whisper.py \ | |
| --model "$WHISPER_KK_FINETUNE" \ | |
| --output /converted/kk \ | |
| --quantization int8; then \ | |
| echo ">>> Conversion of '$WHISPER_KK_FINETUNE' SUCCEEDED."; \ | |
| else \ | |
| echo ">>> CONVERSION FAILED — KK will use primary vanilla model at runtime."; \ | |
| rm -rf /converted/kk; \ | |
| fi \ | |
| ;; \ | |
| esac && \ | |
| echo "--- /converted ---" && ls -la /converted/ && \ | |
| if [ -d /converted/kk ]; then echo "--- /converted/kk ---"; ls -la /converted/kk/; fi | |
| # --------------------------------------------------------------------------- | |
| # Stage 2 — runtime | |
| # --------------------------------------------------------------------------- | |
| FROM python:3.11-slim AS runtime | |
| ENV PYTHONUNBUFFERED=1 \ | |
| PYTHONDONTWRITEBYTECODE=1 \ | |
| PYTHONUTF8=1 \ | |
| PYTHONIOENCODING=utf-8 \ | |
| PIP_NO_CACHE_DIR=1 \ | |
| PIP_DISABLE_PIP_VERSION_CHECK=1 \ | |
| OMP_NUM_THREADS=4 \ | |
| HF_HOME=/app/.cache/hf \ | |
| HF_HUB_DOWNLOAD_TIMEOUT=120 | |
| RUN apt-get update \ | |
| && apt-get install -y --no-install-recommends libgomp1 ca-certificates curl \ | |
| && rm -rf /var/lib/apt/lists/* | |
| WORKDIR /app | |
| RUN pip install --upgrade pip setuptools wheel \ | |
| && pip install \ | |
| fastapi==0.115.0 \ | |
| uvicorn[standard]==0.30.6 \ | |
| pydantic==2.9.2 \ | |
| pydantic-settings==2.5.2 \ | |
| sqlalchemy==2.0.35 \ | |
| python-multipart==0.0.12 \ | |
| scikit-learn==1.5.2 \ | |
| scipy==1.14.1 \ | |
| joblib==1.4.2 \ | |
| numpy==1.26.4 \ | |
| pandas==2.2.3 \ | |
| requests==2.32.3 \ | |
| huggingface-hub==0.25.2 \ | |
| faster-whisper==1.0.3 | |
| COPY app /app/app | |
| COPY ml_training /app/ml_training | |
| # Pull the converted Kazakh CT2 weights (if conversion succeeded). If not, | |
| # /app/models/whisper-kk will be missing /kk subdirectory and the loader's | |
| # fallback uses the primary model for Kazakh. | |
| COPY --from=converter /converted /app/models/whisper-kk | |
| RUN mkdir -p /app/models /app/data \ | |
| && python -m ml_training.generate_dataset \ | |
| && python -m ml_training.train | |
| EXPOSE 8000 | |
| HEALTHCHECK --interval=30s --timeout=10s --start-period=120s --retries=3 \ | |
| CMD curl -fs http://localhost:8000/v1/health || exit 1 | |
| CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000", "--workers", "1"] | |