Spaces:
Sleeping
Sleeping
File size: 5,047 Bytes
a783939 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 | # syntax=docker/dockerfile:1.7
#
# Multi-stage build:
# 1. ``converter`` – best-effort conversion of an optional Kazakh-fine-tuned
# Whisper model from HuggingFace into CTranslate2 / faster-whisper
# format. ``transformers`` and ``torch`` live ONLY in this stage.
# 2. ``runtime`` – installs only the API / ML deps, copies the converted
# KK weights (if any), generates the seed dataset and trains the
# ensemble classifier.
#
# The primary model is always vanilla ``openai/whisper-small`` (loaded by
# faster-whisper at startup, no conversion needed). The KK model is an
# optional academic fine-tune (``akuzdeuov/whisper-base.kk``, 15.36% WER on
# the 1000-hour Kazakh Speech Corpus 2). If KK conversion fails for any
# reason the runtime simply uses the primary model for Kazakh too.
# ---------------------------------------------------------------------------
# Stage 1 — convert the optional Kazakh fine-tune from HF to CT2
# ---------------------------------------------------------------------------
FROM python:3.11-slim AS converter
ENV PIP_NO_CACHE_DIR=1 \
PIP_DISABLE_PIP_VERSION_CHECK=1 \
HF_HOME=/cache/hf
RUN apt-get update \
&& apt-get install -y --no-install-recommends ca-certificates \
&& rm -rf /var/lib/apt/lists/*
# transformers + torch are needed by ct2-transformers-converter to load
# the HuggingFace weights before conversion. CPU torch keeps it small-ish.
RUN pip install \
--index-url https://download.pytorch.org/whl/cpu \
torch==2.4.1
# ctranslate2 4.6.0 is the only version in 4.x that:
# - has the executable-stack fix (broken in 4.4 and 4.5)
# - does NOT pass the 'dtype' kwarg incompatible with WhisperForConditionalGeneration
# (introduced in 4.7+)
RUN pip install \
transformers==4.45.2 \
ctranslate2==4.6.0 \
huggingface-hub==0.25.2 \
sentencepiece==0.2.0 \
safetensors==0.4.5
ARG WHISPER_KK_FINETUNE=akuzdeuov/whisper-base.kk
COPY ml_training/convert_whisper.py /tmp/convert_whisper.py
# Convert the KK fine-tune. Failures are tolerated — the runtime falls
# back to the primary vanilla model for Kazakh in that case.
RUN mkdir -p /converted && \
case "$WHISPER_KK_FINETUNE" in \
""|"none"|"NONE") \
echo ">>> WHISPER_KK_FINETUNE is empty — no KK fine-tune will be built."; \
;; \
tiny|base|small|medium|large|large-v1|large-v2|large-v3) \
echo ">>> WHISPER_KK_FINETUNE='$WHISPER_KK_FINETUNE' is a vanilla size, skipping conversion."; \
;; \
*) \
echo ">>> Converting '$WHISPER_KK_FINETUNE' to CTranslate2 ..."; \
if python /tmp/convert_whisper.py \
--model "$WHISPER_KK_FINETUNE" \
--output /converted/kk \
--quantization int8; then \
echo ">>> Conversion of '$WHISPER_KK_FINETUNE' SUCCEEDED."; \
else \
echo ">>> CONVERSION FAILED — KK will use primary vanilla model at runtime."; \
rm -rf /converted/kk; \
fi \
;; \
esac && \
echo "--- /converted ---" && ls -la /converted/ && \
if [ -d /converted/kk ]; then echo "--- /converted/kk ---"; ls -la /converted/kk/; fi
# ---------------------------------------------------------------------------
# Stage 2 — runtime
# ---------------------------------------------------------------------------
FROM python:3.11-slim AS runtime
ENV PYTHONUNBUFFERED=1 \
PYTHONDONTWRITEBYTECODE=1 \
PYTHONUTF8=1 \
PYTHONIOENCODING=utf-8 \
PIP_NO_CACHE_DIR=1 \
PIP_DISABLE_PIP_VERSION_CHECK=1 \
OMP_NUM_THREADS=4 \
HF_HOME=/app/.cache/hf \
HF_HUB_DOWNLOAD_TIMEOUT=120
RUN apt-get update \
&& apt-get install -y --no-install-recommends libgomp1 ca-certificates curl \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /app
RUN pip install --upgrade pip setuptools wheel \
&& pip install \
fastapi==0.115.0 \
uvicorn[standard]==0.30.6 \
pydantic==2.9.2 \
pydantic-settings==2.5.2 \
sqlalchemy==2.0.35 \
python-multipart==0.0.12 \
scikit-learn==1.5.2 \
scipy==1.14.1 \
joblib==1.4.2 \
numpy==1.26.4 \
pandas==2.2.3 \
requests==2.32.3 \
huggingface-hub==0.25.2 \
faster-whisper==1.0.3
COPY app /app/app
COPY ml_training /app/ml_training
# Pull the converted Kazakh CT2 weights (if conversion succeeded). If not,
# /app/models/whisper-kk will be missing /kk subdirectory and the loader's
# fallback uses the primary model for Kazakh.
COPY --from=converter /converted /app/models/whisper-kk
RUN mkdir -p /app/models /app/data \
&& python -m ml_training.generate_dataset \
&& python -m ml_training.train
EXPOSE 8000
HEALTHCHECK --interval=30s --timeout=10s --start-period=120s --retries=3 \
CMD curl -fs http://localhost:8000/v1/health || exit 1
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000", "--workers", "1"]
|