import librosa
import gradio as gr
from faster_whisper import WhisperModel

class AudioToText:
    """Converts an MP3 file to text using Faster Whisper."""

    def __init__(self, model_size="medium.en", device="cpu", compute_type="int8", language="en"):
        """Initialize the Whisper model for transcription."""
        self.model = WhisperModel(model_size, device=device, compute_type=compute_type)
        self.language = language

    def transcribe_audio(self, file_path):
        """Transcribe an MP3 audio file to text."""
        audio, sr = librosa.load(file_path, sr=16000)
        segments, _ = self.model.transcribe(audio, beam_size=5, language=self.language, vad_filter=True)
        transcription = "\n".join([segment.text.strip() for segment in segments])
        return transcription

# Create an instance of the transcription model
transcriber = AudioToText()

# Define the Gradio function
def transcribe_gradio(file_path):
    return transcriber.transcribe_audio(file_path)

# Create Gradio interface
demo = gr.Interface(
    fn=transcribe_gradio,
    inputs=gr.Audio(type="filepath"),  # ✅ Fixed: Changed "file" to "filepath"
    outputs="text",
    title="Speech-to-Text Whisper Demo",
    description="Upload an MP3 file to transcribe it to text using Faster Whisper.",
)

# Launch the demo
demo.launch()