import gradio as gr import whisper import torch from transformers import pipeline from pydub import AudioSegment #Set the device DEVICE = "cuda" if torch.cuda.is_available() else "cpu" # Load Whisper model model = whisper.load_model("small", device=DEVICE) # Load summarization pipeline summarizer = pipeline("summarization", model="facebook/bart-large-cnn") def process_audio(audio_file): # Convert audio to WAV format if not already audio = AudioSegment.from_file(audio_file) audio = audio.set_channels(1).set_frame_rate(16000) temp_wav_path = "temp.wav" audio.export(temp_wav_path, format="wav") # Transcribe the audio result = model.transcribe(temp_wav_path) transcript = result["text"] # Summarize the transcript summary = summarizer(transcript, max_length=100, min_length=25, do_sample=False)[0]["summary_text"] return transcript, summary # Gradio Interface interface = gr.Interface( fn=process_audio, inputs=gr.Audio(type="filepath"), outputs=[ gr.Textbox(label="Transcript"), gr.Textbox(label="Summary") ], title="Audio Summarization App", description="Upload an audio file to get its transcription and summary." ) if __name__ == "__main__": interface.launch()