File size: 5,047 Bytes
a783939
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
# syntax=docker/dockerfile:1.7
#
# Multi-stage build:
#   1. ``converter``  – best-effort conversion of an optional Kazakh-fine-tuned
#      Whisper model from HuggingFace into CTranslate2 / faster-whisper
#      format. ``transformers`` and ``torch`` live ONLY in this stage.
#   2. ``runtime``    – installs only the API / ML deps, copies the converted
#      KK weights (if any), generates the seed dataset and trains the
#      ensemble classifier.
#
# The primary model is always vanilla ``openai/whisper-small`` (loaded by
# faster-whisper at startup, no conversion needed). The KK model is an
# optional academic fine-tune (``akuzdeuov/whisper-base.kk``, 15.36% WER on
# the 1000-hour Kazakh Speech Corpus 2). If KK conversion fails for any
# reason the runtime simply uses the primary model for Kazakh too.

# ---------------------------------------------------------------------------
# Stage 1 — convert the optional Kazakh fine-tune from HF to CT2
# ---------------------------------------------------------------------------
FROM python:3.11-slim AS converter

ENV PIP_NO_CACHE_DIR=1 \
    PIP_DISABLE_PIP_VERSION_CHECK=1 \
    HF_HOME=/cache/hf

RUN apt-get update \
    && apt-get install -y --no-install-recommends ca-certificates \
    && rm -rf /var/lib/apt/lists/*

# transformers + torch are needed by ct2-transformers-converter to load
# the HuggingFace weights before conversion. CPU torch keeps it small-ish.
RUN pip install \
        --index-url https://download.pytorch.org/whl/cpu \
        torch==2.4.1
# ctranslate2 4.6.0 is the only version in 4.x that:
#   - has the executable-stack fix (broken in 4.4 and 4.5)
#   - does NOT pass the 'dtype' kwarg incompatible with WhisperForConditionalGeneration
#     (introduced in 4.7+)
RUN pip install \
        transformers==4.45.2 \
        ctranslate2==4.6.0 \
        huggingface-hub==0.25.2 \
        sentencepiece==0.2.0 \
        safetensors==0.4.5

ARG WHISPER_KK_FINETUNE=akuzdeuov/whisper-base.kk

COPY ml_training/convert_whisper.py /tmp/convert_whisper.py

# Convert the KK fine-tune. Failures are tolerated — the runtime falls
# back to the primary vanilla model for Kazakh in that case.
RUN mkdir -p /converted && \
    case "$WHISPER_KK_FINETUNE" in \
        ""|"none"|"NONE") \
            echo ">>> WHISPER_KK_FINETUNE is empty — no KK fine-tune will be built."; \
            ;; \
        tiny|base|small|medium|large|large-v1|large-v2|large-v3) \
            echo ">>> WHISPER_KK_FINETUNE='$WHISPER_KK_FINETUNE' is a vanilla size, skipping conversion."; \
            ;; \
        *) \
            echo ">>> Converting '$WHISPER_KK_FINETUNE' to CTranslate2 ..."; \
            if python /tmp/convert_whisper.py \
                    --model "$WHISPER_KK_FINETUNE" \
                    --output /converted/kk \
                    --quantization int8; then \
                echo ">>> Conversion of '$WHISPER_KK_FINETUNE' SUCCEEDED."; \
            else \
                echo ">>> CONVERSION FAILED — KK will use primary vanilla model at runtime."; \
                rm -rf /converted/kk; \
            fi \
            ;; \
    esac && \
    echo "--- /converted ---" && ls -la /converted/ && \
    if [ -d /converted/kk ]; then echo "--- /converted/kk ---"; ls -la /converted/kk/; fi

# ---------------------------------------------------------------------------
# Stage 2 — runtime
# ---------------------------------------------------------------------------
FROM python:3.11-slim AS runtime

ENV PYTHONUNBUFFERED=1 \
    PYTHONDONTWRITEBYTECODE=1 \
    PYTHONUTF8=1 \
    PYTHONIOENCODING=utf-8 \
    PIP_NO_CACHE_DIR=1 \
    PIP_DISABLE_PIP_VERSION_CHECK=1 \
    OMP_NUM_THREADS=4 \
    HF_HOME=/app/.cache/hf \
    HF_HUB_DOWNLOAD_TIMEOUT=120

RUN apt-get update \
    && apt-get install -y --no-install-recommends libgomp1 ca-certificates curl \
    && rm -rf /var/lib/apt/lists/*

WORKDIR /app

RUN pip install --upgrade pip setuptools wheel \
    && pip install \
        fastapi==0.115.0 \
        uvicorn[standard]==0.30.6 \
        pydantic==2.9.2 \
        pydantic-settings==2.5.2 \
        sqlalchemy==2.0.35 \
        python-multipart==0.0.12 \
        scikit-learn==1.5.2 \
        scipy==1.14.1 \
        joblib==1.4.2 \
        numpy==1.26.4 \
        pandas==2.2.3 \
        requests==2.32.3 \
        huggingface-hub==0.25.2 \
        faster-whisper==1.0.3

COPY app /app/app
COPY ml_training /app/ml_training

# Pull the converted Kazakh CT2 weights (if conversion succeeded). If not,
# /app/models/whisper-kk will be missing /kk subdirectory and the loader's
# fallback uses the primary model for Kazakh.
COPY --from=converter /converted /app/models/whisper-kk

RUN mkdir -p /app/models /app/data \
    && python -m ml_training.generate_dataset \
    && python -m ml_training.train

EXPOSE 8000

HEALTHCHECK --interval=30s --timeout=10s --start-period=120s --retries=3 \
  CMD curl -fs http://localhost:8000/v1/health || exit 1

CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000", "--workers", "1"]