Spaces:
Paused
Paused
| import gradio as gr | |
| import whisper | |
| import torch | |
| # Load the Whisper base model | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| model = whisper.load_model("base", device=device) | |
| def transcribe(audio): | |
| # Load and process the audio file | |
| audio = whisper.load_audio(audio) | |
| audio = whisper.pad_or_trim(audio) | |
| mel = whisper.log_mel_spectrogram(audio).to(model.device) | |
| # Detect the language of the audio | |
| _, probs = model.detect_language(mel) | |
| language = max(probs, key=probs.get) | |
| print(f"Detected language: {language}") | |
| # Transcribe the audio | |
| options = whisper.DecodingOptions(fp16=torch.cuda.is_available()) | |
| result = whisper.decode(model, mel, options) | |
| return result.text | |
| # Create the Gradio interface | |
| with gr.Blocks() as demo: | |
| gr.Markdown("## Multilingual Speech-to-Text Transcription") | |
| with gr.Tab("Upload Audio"): | |
| audio_file = gr.Audio(source="upload", type="filepath", label="Upload your audio file") | |
| transcribe_button = gr.Button("Transcribe") | |
| transcription_output = gr.Textbox(label="Transcription") | |
| with gr.Tab("Record Audio"): | |
| audio_record = gr.Audio(source="microphone", type="filepath", label="Record your audio") | |
| record_button = gr.Button("Transcribe") | |
| record_output = gr.Textbox(label="Transcription") | |
| # Define button actions | |
| transcribe_button.click(transcribe, inputs=audio_file, outputs=transcription_output) | |
| record_button.click(transcribe, inputs=audio_record, outputs=record_output) | |
| if __name__ == "__main__": | |
| demo.launch() | |