|
|
import whisper |
|
|
import gradio as gr |
|
|
import time |
|
|
|
|
|
model = whisper.load_model("base") |
|
|
|
|
|
def transcribe(audio): |
|
|
|
|
|
|
|
|
|
|
|
audio = whisper.load_audio(audio) |
|
|
audio = whisper.pad_or_trim(audio) |
|
|
|
|
|
|
|
|
mel = whisper.log_mel_spectrogram(audio).to(model.device) |
|
|
|
|
|
|
|
|
_, probs = model.detect_language(mel) |
|
|
print(f"Detected language: {max(probs, key=probs.get)}") |
|
|
|
|
|
|
|
|
options = whisper.DecodingOptions() |
|
|
result = whisper.decode(model, mel, options) |
|
|
return result.text |
|
|
|
|
|
gr.Interface( |
|
|
title="OpenAI-Whisper Audio to Text Web UI", |
|
|
fn=transcribe, |
|
|
inputs=[gr.components.Audio(type="filepath")], |
|
|
outputs=["textbox"], |
|
|
live=True |
|
|
).launch() |
|
|
|