CyFutureDemo / app.py
ronit1710's picture
Update app.py
8df044d verified
import librosa
import gradio as gr
from faster_whisper import WhisperModel
class AudioToText:
"""Converts an MP3 file to text using Faster Whisper."""
def __init__(self, model_size="medium.en", device="cpu", compute_type="int8", language="en"):
"""Initialize the Whisper model for transcription."""
self.model = WhisperModel(model_size, device=device, compute_type=compute_type)
self.language = language
def transcribe_audio(self, file_path):
"""Transcribe an MP3 audio file to text."""
audio, sr = librosa.load(file_path, sr=16000)
segments, _ = self.model.transcribe(audio, beam_size=5, language=self.language, vad_filter=True)
transcription = "\n".join([segment.text.strip() for segment in segments])
return transcription
# Create an instance of the transcription model
transcriber = AudioToText()
# Define the Gradio function
def transcribe_gradio(file_path):
return transcriber.transcribe_audio(file_path)
# Create Gradio interface
demo = gr.Interface(
fn=transcribe_gradio,
inputs=gr.Audio(type="filepath"), # ✅ Fixed: Changed "file" to "filepath"
outputs="text",
title="Speech-to-Text Whisper Demo",
description="Upload an MP3 file to transcribe it to text using Faster Whisper.",
)
# Launch the demo
demo.launch()