store-dialogs-qa / models_config.py
Niko-NN's picture
feat: benchmark pipeline + grid search + new Russian ASR models
e120525
from __future__ import annotations
DIARIZATION_MODELS: dict[str, dict] = {
"pyannote-3.1": {
"name": "pyannote speaker-diarization 3.1",
"repo_id": "pyannote/speaker-diarization-3.1",
"backend": "pyannote",
},
"nemo-msdd": {
"name": "NeMo MSDD (экспериментальный)",
"backend": "nemo-msdd",
"config": "diar_msdd_telephonic",
},
}
TRANSCRIPTION_MODELS: dict[str, dict] = {
# --- faster-whisper (CTranslate2) ---
"whisper-large-v3": {
"name": "Whisper large-v3",
"repo_id": "Systran/faster-whisper-large-v3",
"backend": "faster-whisper",
"compute_type": "float16",
"language": "ru",
},
"whisper-large-v3-turbo": {
"name": "Whisper large-v3-turbo",
"repo_id": "Systran/faster-whisper-large-v3-turbo",
"backend": "faster-whisper",
"compute_type": "float16",
"language": "ru",
},
# --- transformers-whisper (HF native, Russian fine-tuned) ---
"whisper-podlodka-turbo": {
"name": "Whisper Podlodka Turbo (RU)",
"repo_id": "bond005/whisper-podlodka-turbo",
"backend": "transformers-whisper",
"language": "ru",
},
"whisper-large-v3-russian": {
"name": "Whisper large-v3 Russian",
"repo_id": "antony66/whisper-large-v3-russian",
"backend": "transformers-whisper",
"language": "ru",
},
"whisper-russian-ties-podlodka": {
"name": "Whisper TIES-merge Podlodka (RU)",
"repo_id": "Apel-sin/whisper-large-v3-russian-ties-podlodka-v1.2",
"backend": "transformers-whisper",
"language": "ru",
},
# --- GigaAM ---
"gigaam-ctc": {
"name": "GigaAM CTC",
"backend": "gigaam",
"model_type": "ctc",
"language": "ru",
},
"gigaam-rnnt": {
"name": "GigaAM RNNT",
"backend": "gigaam",
"model_type": "rnnt",
"language": "ru",
},
}
def diarization_choices() -> list[tuple[str, str]]:
return [(v["name"], k) for k, v in DIARIZATION_MODELS.items()]
def transcription_choices() -> list[tuple[str, str]]:
return [(v["name"], k) for k, v in TRANSCRIPTION_MODELS.items()]