speechorwhat / app.py
arevedudaa's picture
Create app.py
34c0dbd verified
import gradio as gr
import whisper
import torch
# Load the Whisper base model
device = "cuda" if torch.cuda.is_available() else "cpu"
model = whisper.load_model("base", device=device)
def transcribe(audio):
# Load and process the audio file
audio = whisper.load_audio(audio)
audio = whisper.pad_or_trim(audio)
mel = whisper.log_mel_spectrogram(audio).to(model.device)
# Detect the language of the audio
_, probs = model.detect_language(mel)
language = max(probs, key=probs.get)
print(f"Detected language: {language}")
# Transcribe the audio
options = whisper.DecodingOptions(fp16=torch.cuda.is_available())
result = whisper.decode(model, mel, options)
return result.text
# Create the Gradio interface
with gr.Blocks() as demo:
gr.Markdown("## Multilingual Speech-to-Text Transcription")
with gr.Tab("Upload Audio"):
audio_file = gr.Audio(source="upload", type="filepath", label="Upload your audio file")
transcribe_button = gr.Button("Transcribe")
transcription_output = gr.Textbox(label="Transcription")
with gr.Tab("Record Audio"):
audio_record = gr.Audio(source="microphone", type="filepath", label="Record your audio")
record_button = gr.Button("Transcribe")
record_output = gr.Textbox(label="Transcription")
# Define button actions
transcribe_button.click(transcribe, inputs=audio_file, outputs=transcription_output)
record_button.click(transcribe, inputs=audio_record, outputs=record_output)
if __name__ == "__main__":
demo.launch()