| import gradio as gr | |
| import torch | |
| import numpy as np | |
| import soundfile as sf | |
| import librosa | |
| import time | |
| from transformers import pipeline | |
| pipe = pipeline( | |
| "automatic-speech-recognition", | |
| model="antony66/whisper-large-v3-russian", | |
| torch_dtype=torch.float16, | |
| device=0 if torch.cuda.is_available() else -1 | |
| ) | |
| def transcribe(audio_data): | |
| log_messages = [] | |
| start_time = time.time() | |
| log_messages.append("Загрузка файла...") | |
| if audio_data is None: | |
| return "Ошибка: не получены аудиоданные", "\n".join(log_messages) | |
| wav_file = "temp_audio.wav" | |
| if isinstance(audio_data, tuple): | |
| audio_array, sample_rate = audio_data | |
| sf.write(wav_file, audio_array, sample_rate) | |
| elif isinstance(audio_data, str): | |
| audio_array, sample_rate = librosa.load(audio_data, sr=16000) | |
| sf.write(wav_file, audio_array, sample_rate) | |
| else: | |
| return "Ошибка: неизвестный формат аудиоданных", "\n".join(log_messages) | |
| log_messages.append(f"Загрузка файла завершена за {time.time() - start_time:.2f} сек") | |
| start_time = time.time() | |
| log_messages.append("Работа модели... в процессе") | |
| result = pipe(wav_file) | |
| log_messages.append(f"Работа модели завершена за {time.time() - start_time:.2f} сек") | |
| return result["text"], "\n".join(log_messages) | |
| with gr.Blocks() as app: | |
| gr.Markdown("## Распознавание речи с Whisper") | |
| audio_data = gr.Audio(type="filepath") | |
| text_output = gr.Textbox(label="Распознанный текст") | |
| log_output = gr.Textbox(label="Лог выполнения", interactive=False) | |
| btn = gr.Button("Распознать") | |
| btn.click(transcribe, inputs=audio_data, outputs=[text_output, log_output]) | |
| app.launch(debug=True) | |