| import os |
| os.system("pip install git+https://github.com/openai/whisper.git") |
|
|
| import gradio as gr |
| import whisper |
| model = whisper.load_model("large") |
|
|
| import time |
|
|
| def transcribe(audio): |
| |
| audio = whisper.load_audio(audio) |
| audio = whisper.pad_or_trim(audio) |
|
|
| |
| mel = whisper.log_mel_spectrogram(audio).to(model.device) |
|
|
| |
| _, probs = model.detect_language(mel) |
| print(f"Detected language: {max(probs, key=probs.get)}") |
|
|
| |
| options = whisper.DecodingOptions(fp16 = False) |
| result = whisper.decode(model, mel, options) |
| print(result.text) |
| return result.text |
| |
| gr.Interface( |
| title = 'Speech to Text with OpenAI (large)', |
| fn=transcribe, |
| inputs=[ |
| gr.inputs.Audio(source="microphone", type="filepath") |
| ], |
| outputs=[ |
| "textbox" |
| ], |
| live=True).launch() |