Spaces:
Running
Running
Commit ·
369d759
1
Parent(s): bd1a487
OptimezedWhisperWorking
Browse files- Dockerfile +7 -2
- app.py +73 -5
Dockerfile
CHANGED
|
@@ -2,7 +2,12 @@ FROM python:3.11-slim
|
|
| 2 |
|
| 3 |
ENV PYTHONUNBUFFERED=1 PIP_NO_CACHE_DIR=1 HOME=/home/user \
|
| 4 |
PATH=/home/user/.local/bin:$PATH PORT=7860 \
|
| 5 |
-
WHISPER_MODEL=openai/whisper-large-v3-turbo
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
RUN apt-get update && apt-get install -y --no-install-recommends ffmpeg \
|
| 8 |
&& rm -rf /var/lib/apt/lists/* \
|
|
@@ -18,4 +23,4 @@ COPY --chown=user app.py ./
|
|
| 18 |
COPY --chown=user extractors/ ./extractors/
|
| 19 |
|
| 20 |
EXPOSE 7860
|
| 21 |
-
CMD ["gunicorn", "--bind", "0.0.0.0:7860", "--
|
|
|
|
| 2 |
|
| 3 |
ENV PYTHONUNBUFFERED=1 PIP_NO_CACHE_DIR=1 HOME=/home/user \
|
| 4 |
PATH=/home/user/.local/bin:$PATH PORT=7860 \
|
| 5 |
+
WHISPER_MODEL=openai/whisper-large-v3-turbo \
|
| 6 |
+
OMP_NUM_THREADS=2 MKL_NUM_THREADS=2 OPENBLAS_NUM_THREADS=2 NUMEXPR_NUM_THREADS=2 \
|
| 7 |
+
TOKENIZERS_PARALLELISM=false \
|
| 8 |
+
WHISPER_CPU_THREADS=2 WHISPER_CPU_INTEROP_THREADS=1 \
|
| 9 |
+
WHISPER_CHUNK_LENGTH_S=30 WHISPER_BATCH_SIZE=8 WHISPER_NUM_BEAMS=1 \
|
| 10 |
+
WHISPER_ENABLE_PROMPT=0 WHISPER_PRELOAD_ON_START=1
|
| 11 |
|
| 12 |
RUN apt-get update && apt-get install -y --no-install-recommends ffmpeg \
|
| 13 |
&& rm -rf /var/lib/apt/lists/* \
|
|
|
|
| 23 |
COPY --chown=user extractors/ ./extractors/
|
| 24 |
|
| 25 |
EXPOSE 7860
|
| 26 |
+
CMD ["gunicorn", "--bind", "0.0.0.0:7860", "--worker-class", "sync", "--workers", "1", "--timeout", "180", "--preload", "app:app"]
|
app.py
CHANGED
|
@@ -31,6 +31,7 @@ HF_TOKEN = os.getenv("HF_TOKEN")
|
|
| 31 |
|
| 32 |
_WHISPER_MODEL: Optional[Any] = None
|
| 33 |
_WHISPER_PROCESSOR: Optional[Any] = None
|
|
|
|
| 34 |
|
| 35 |
|
| 36 |
app = Flask(__name__)
|
|
@@ -140,18 +141,48 @@ TEST_PHRASES = [
|
|
| 140 |
]
|
| 141 |
|
| 142 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 143 |
def get_whisper_pipeline() -> Any:
|
| 144 |
"""Возвращает Whisper pipeline (ленивая загрузка)."""
|
| 145 |
global _WHISPER_MODEL, _WHISPER_PROCESSOR
|
|
|
|
| 146 |
|
| 147 |
if _WHISPER_MODEL is None:
|
| 148 |
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
|
| 149 |
|
| 150 |
model_id = os.getenv("WHISPER_MODEL", "openai/whisper-large-v3-turbo")
|
|
|
|
| 151 |
|
| 152 |
model = AutoModelForSpeechSeq2Seq.from_pretrained(
|
| 153 |
model_id,
|
| 154 |
-
|
| 155 |
low_cpu_mem_usage=True,
|
| 156 |
use_safetensors=True,
|
| 157 |
)
|
|
@@ -159,13 +190,18 @@ def get_whisper_pipeline() -> Any:
|
|
| 159 |
|
| 160 |
_WHISPER_PROCESSOR = AutoProcessor.from_pretrained(model_id)
|
| 161 |
|
|
|
|
|
|
|
|
|
|
| 162 |
_WHISPER_MODEL = pipeline(
|
| 163 |
"automatic-speech-recognition",
|
| 164 |
model=model,
|
| 165 |
tokenizer=_WHISPER_PROCESSOR.tokenizer,
|
| 166 |
feature_extractor=_WHISPER_PROCESSOR.feature_extractor,
|
| 167 |
-
|
| 168 |
device="cpu",
|
|
|
|
|
|
|
| 169 |
)
|
| 170 |
|
| 171 |
return _WHISPER_MODEL
|
|
@@ -306,22 +342,38 @@ def transcribe_audio_text(audio_path: str, suppliers: list[str] | None = None, u
|
|
| 306 |
|
| 307 |
try:
|
| 308 |
t0 = time.time()
|
|
|
|
| 309 |
pipe = get_whisper_pipeline()
|
|
|
|
| 310 |
|
| 311 |
generate_kwargs = {
|
| 312 |
"language": "russian",
|
| 313 |
"task": "transcribe",
|
|
|
|
|
|
|
|
|
|
| 314 |
}
|
| 315 |
|
| 316 |
-
|
| 317 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 318 |
try:
|
| 319 |
generate_kwargs["prompt_ids"] = _WHISPER_PROCESSOR.get_prompt_ids(prompt, return_tensors="pt")
|
| 320 |
print(f"[TIMINGS] whisper_prompt_enabled: suppliers={len(suppliers or [])}, users={len(users or [])}")
|
| 321 |
except Exception as prompt_error:
|
| 322 |
print(f"[WARN] Whisper prompt disabled: {prompt_error}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 323 |
|
| 324 |
-
result = pipe(audio_path, generate_kwargs=generate_kwargs)
|
| 325 |
text = result.get("text", "").strip()
|
| 326 |
elapsed = round(time.time() - t0, 3)
|
| 327 |
print(f"[TIMINGS] whisper_transcribe: {elapsed}s")
|
|
@@ -401,6 +453,22 @@ def parse_context(raw: str | None) -> dict[str, Any]:
|
|
| 401 |
return {}
|
| 402 |
|
| 403 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 404 |
# ============================================================================
|
| 405 |
# ENDPOINTS
|
| 406 |
# ============================================================================
|
|
|
|
| 31 |
|
| 32 |
_WHISPER_MODEL: Optional[Any] = None
|
| 33 |
_WHISPER_PROCESSOR: Optional[Any] = None
|
| 34 |
+
_TORCH_CPU_CONFIGURED = False
|
| 35 |
|
| 36 |
|
| 37 |
app = Flask(__name__)
|
|
|
|
| 141 |
]
|
| 142 |
|
| 143 |
|
| 144 |
+
def env_flag(name: str, default: bool = False) -> bool:
|
| 145 |
+
"""Парсит bool-флаг из переменных окружения."""
|
| 146 |
+
raw = os.getenv(name)
|
| 147 |
+
if raw is None:
|
| 148 |
+
return default
|
| 149 |
+
return raw.strip().lower() in {"1", "true", "yes", "on"}
|
| 150 |
+
|
| 151 |
+
|
| 152 |
+
def configure_torch_for_cpu() -> None:
|
| 153 |
+
"""Настраивает torch для CPU-инференса."""
|
| 154 |
+
global _TORCH_CPU_CONFIGURED
|
| 155 |
+
if _TORCH_CPU_CONFIGURED:
|
| 156 |
+
return
|
| 157 |
+
|
| 158 |
+
cpu_count = max(1, os.cpu_count() or 1)
|
| 159 |
+
num_threads = int(os.getenv("WHISPER_CPU_THREADS", str(cpu_count)))
|
| 160 |
+
num_threads = max(1, min(num_threads, cpu_count))
|
| 161 |
+
|
| 162 |
+
interop_threads = int(os.getenv("WHISPER_CPU_INTEROP_THREADS", "1"))
|
| 163 |
+
interop_threads = max(1, interop_threads)
|
| 164 |
+
|
| 165 |
+
torch.set_num_threads(num_threads)
|
| 166 |
+
torch.set_num_interop_threads(interop_threads)
|
| 167 |
+
torch.backends.mkldnn.enabled = True
|
| 168 |
+
_TORCH_CPU_CONFIGURED = True
|
| 169 |
+
print(f"[INFO] torch cpu threads configured: intra={num_threads}, interop={interop_threads}")
|
| 170 |
+
|
| 171 |
+
|
| 172 |
def get_whisper_pipeline() -> Any:
|
| 173 |
"""Возвращает Whisper pipeline (ленивая загрузка)."""
|
| 174 |
global _WHISPER_MODEL, _WHISPER_PROCESSOR
|
| 175 |
+
configure_torch_for_cpu()
|
| 176 |
|
| 177 |
if _WHISPER_MODEL is None:
|
| 178 |
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
|
| 179 |
|
| 180 |
model_id = os.getenv("WHISPER_MODEL", "openai/whisper-large-v3-turbo")
|
| 181 |
+
torch_dtype = torch.float32
|
| 182 |
|
| 183 |
model = AutoModelForSpeechSeq2Seq.from_pretrained(
|
| 184 |
model_id,
|
| 185 |
+
torch_dtype=torch_dtype,
|
| 186 |
low_cpu_mem_usage=True,
|
| 187 |
use_safetensors=True,
|
| 188 |
)
|
|
|
|
| 190 |
|
| 191 |
_WHISPER_PROCESSOR = AutoProcessor.from_pretrained(model_id)
|
| 192 |
|
| 193 |
+
chunk_length_s = int(os.getenv("WHISPER_CHUNK_LENGTH_S", "30"))
|
| 194 |
+
batch_size = int(os.getenv("WHISPER_BATCH_SIZE", "8"))
|
| 195 |
+
|
| 196 |
_WHISPER_MODEL = pipeline(
|
| 197 |
"automatic-speech-recognition",
|
| 198 |
model=model,
|
| 199 |
tokenizer=_WHISPER_PROCESSOR.tokenizer,
|
| 200 |
feature_extractor=_WHISPER_PROCESSOR.feature_extractor,
|
| 201 |
+
torch_dtype=torch_dtype,
|
| 202 |
device="cpu",
|
| 203 |
+
chunk_length_s=max(0, chunk_length_s),
|
| 204 |
+
batch_size=max(1, batch_size),
|
| 205 |
)
|
| 206 |
|
| 207 |
return _WHISPER_MODEL
|
|
|
|
| 342 |
|
| 343 |
try:
|
| 344 |
t0 = time.time()
|
| 345 |
+
pipeline_t0 = time.time()
|
| 346 |
pipe = get_whisper_pipeline()
|
| 347 |
+
print(f"[TIMINGS] whisper_pipeline_ready: {round(time.time() - pipeline_t0, 3)}s")
|
| 348 |
|
| 349 |
generate_kwargs = {
|
| 350 |
"language": "russian",
|
| 351 |
"task": "transcribe",
|
| 352 |
+
"num_beams": int(os.getenv("WHISPER_NUM_BEAMS", "1")),
|
| 353 |
+
"do_sample": False,
|
| 354 |
+
"condition_on_prev_text": False,
|
| 355 |
}
|
| 356 |
|
| 357 |
+
use_prompt = env_flag("WHISPER_ENABLE_PROMPT", default=False)
|
| 358 |
+
prompt = ""
|
| 359 |
+
if use_prompt:
|
| 360 |
+
max_items = int(os.getenv("WHISPER_PROMPT_MAX_ITEMS", "12"))
|
| 361 |
+
prompt = build_whisper_prompt(suppliers or [], users or [], max_items=max_items)
|
| 362 |
+
|
| 363 |
+
if use_prompt and prompt and _WHISPER_PROCESSOR is not None:
|
| 364 |
try:
|
| 365 |
generate_kwargs["prompt_ids"] = _WHISPER_PROCESSOR.get_prompt_ids(prompt, return_tensors="pt")
|
| 366 |
print(f"[TIMINGS] whisper_prompt_enabled: suppliers={len(suppliers or [])}, users={len(users or [])}")
|
| 367 |
except Exception as prompt_error:
|
| 368 |
print(f"[WARN] Whisper prompt disabled: {prompt_error}")
|
| 369 |
+
elif not use_prompt:
|
| 370 |
+
print("[TIMINGS] whisper_prompt_disabled")
|
| 371 |
+
|
| 372 |
+
infer_t0 = time.time()
|
| 373 |
+
with torch.inference_mode():
|
| 374 |
+
result = pipe(audio_path, generate_kwargs=generate_kwargs)
|
| 375 |
+
print(f"[TIMINGS] whisper_infer_only: {round(time.time() - infer_t0, 3)}s")
|
| 376 |
|
|
|
|
| 377 |
text = result.get("text", "").strip()
|
| 378 |
elapsed = round(time.time() - t0, 3)
|
| 379 |
print(f"[TIMINGS] whisper_transcribe: {elapsed}s")
|
|
|
|
| 453 |
return {}
|
| 454 |
|
| 455 |
|
| 456 |
+
def preload_whisper_if_enabled() -> None:
|
| 457 |
+
"""Предзагружает Whisper при старте процесса, чтобы убрать холодный старт в запросе."""
|
| 458 |
+
if not env_flag("WHISPER_PRELOAD_ON_START", default=True):
|
| 459 |
+
return
|
| 460 |
+
|
| 461 |
+
started = time.time()
|
| 462 |
+
try:
|
| 463 |
+
get_whisper_pipeline()
|
| 464 |
+
print(f"[TIMINGS] whisper_preload: {round(time.time() - started, 3)}s")
|
| 465 |
+
except Exception as preload_error:
|
| 466 |
+
print(f"[WARN] Whisper preload failed: {preload_error}")
|
| 467 |
+
|
| 468 |
+
|
| 469 |
+
preload_whisper_if_enabled()
|
| 470 |
+
|
| 471 |
+
|
| 472 |
# ============================================================================
|
| 473 |
# ENDPOINTS
|
| 474 |
# ============================================================================
|