|
|
import os |
|
|
import tempfile |
|
|
import datetime |
|
|
|
|
|
import gradio as gr |
|
|
import torch |
|
|
import whisperx |
|
|
from whisperx.diarize import DiarizationPipeline |
|
|
|
|
|
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
|
|
|
|
|
|
asr_model = whisperx.load_model( |
|
|
"small", |
|
|
device=device, |
|
|
compute_type="int8" |
|
|
) |
|
|
|
|
|
|
|
|
align_model, metadata = whisperx.load_align_model( |
|
|
language_code="ru", |
|
|
device=device |
|
|
) |
|
|
|
|
|
|
|
|
hf_token = os.getenv("HUGGINGFACEHUB_API_TOKEN", None) |
|
|
diarization_pipeline = DiarizationPipeline( |
|
|
use_auth_token=hf_token, |
|
|
device=device |
|
|
) |
|
|
|
|
|
def transcribe_with_diarization(audio_path): |
|
|
|
|
|
result = asr_model.transcribe(audio_path, language="ru", batch_size=16) |
|
|
|
|
|
|
|
|
aligned = whisperx.align( |
|
|
result["segments"], |
|
|
align_model, |
|
|
metadata, |
|
|
audio_path, |
|
|
device |
|
|
) |
|
|
|
|
|
|
|
|
diarization = diarization_pipeline(audio_path) |
|
|
|
|
|
|
|
|
merged = whisperx.assign_word_speakers(diarization, aligned)["segments"] |
|
|
|
|
|
|
|
|
lines = [] |
|
|
for seg in merged: |
|
|
spk = seg.get("speaker", "Speaker") |
|
|
txt = seg.get("text", "").strip() |
|
|
lines.append(f"[{spk}] {txt}") |
|
|
return "\n".join(lines) |
|
|
|
|
|
def export_to_txt(text): |
|
|
|
|
|
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") |
|
|
filename = f"transcript_{timestamp}.txt" |
|
|
path = os.path.join(tempfile.gettempdir(), filename) |
|
|
with open(path, "w", encoding="utf-8") as f: |
|
|
f.write(text) |
|
|
return path |
|
|
|
|
|
|
|
|
app = gr.Blocks(title="🎙️ DiarAI: Транскрибация и диаризация (RU)") |
|
|
|
|
|
with app: |
|
|
gr.Markdown(""" |
|
|
## Транскрибация и диаризация (русский язык) |
|
|
- Фиксированный язык распознавания: **ru** для повышения скорости. |
|
|
- Диаризация спикеров через Pyannote. |
|
|
""") |
|
|
|
|
|
audio_input = gr.Audio(type="filepath", label="Загрузите аудио (только RU)") |
|
|
transcribe_btn = gr.Button("▶️ Транскрибировать") |
|
|
output_txt = gr.Textbox(label="Результат транскрипции", lines=20) |
|
|
save_btn = gr.Button("💾 Экспорт в .txt") |
|
|
download_file = gr.File(label="Скачать результат") |
|
|
|
|
|
transcribe_btn.click( |
|
|
fn=transcribe_with_diarization, |
|
|
inputs=audio_input, |
|
|
outputs=output_txt |
|
|
) |
|
|
save_btn.click( |
|
|
fn=export_to_txt, |
|
|
inputs=output_txt, |
|
|
outputs=download_file |
|
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
app.launch() |