import gradio as gr import os import librosa import soundfile as sf import tempfile from transformers import pipeline # Load summarization model (replace with your preferred one) summarizer = pipeline("summarization", model="facebook/bart-large-cnn") # Load Whisper or any ASR model of your choice import whisper asr_model = whisper.load_model("base") # You can change this to 'medium' or 'large' def transcribe_and_summarize(audio_file): try: # Convert audio to suitable format with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_wav: data, sr = librosa.load(audio_file, sr=16000) sf.write(tmp_wav.name, data, sr) # Transcribe audio result = asr_model.transcribe(tmp_wav.name) transcription = result["text"] # Summarize summary = summarizer(transcription, max_length=120, min_length=30, do_sample=False)[0]["summary_text"] return transcription, summary except Exception as e: return f"Error during processing: {str(e)}", "" # Gradio UI demo = gr.Interface( fn=transcribe_and_summarize, inputs=gr.Audio(type="filepath", label="Upload MP3 File"), outputs=[ gr.Textbox(label="Transcription"), gr.Textbox(label="Summary") ], title="🎧 MP3 to Transcript & Summary", description="Upload an MP3 file and get the transcript with an audio summary.", ) if __name__ == "__main__": demo.launch()