| import gradio as gr | |
| import torchaudio | |
| from speechbrain.pretrained import EncoderClassifier | |
| # Load the SpeechBrain model separately | |
| model = EncoderClassifier.from_hparams(source="speechbrain/mtl-mimic-voicebank", savedir="tmp") | |
| # Define the function to transcribe audio | |
| def transcribe(audio): | |
| # Load and process the audio file using torchaudio | |
| signal, rate = torchaudio.load(audio) | |
| # Make predictions using the SpeechBrain model | |
| output = model.classify_batch(signal) | |
| return output | |
| # Define a CSS string to hide the footer | |
| custom_css = """ | |
| footer {visibility: hidden;} | |
| """ | |
| # Create the Gradio interface | |
| demo = gr.Interface( | |
| fn=transcribe, # Function to process input | |
| inputs=gr.Audio(sources="upload"), # Take audio input | |
| outputs="text", # Display output as text | |
| css=custom_css # Hide the Gradio footer | |
| ) | |
| # Launch the interface | |
| demo.launch() |