nkadoor's picture
Update app.py
54b1ee7 verified
import gradio as gr
import whisper
import torch
from transformers import pipeline
from pydub import AudioSegment
#Set the device
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
# Load Whisper model
model = whisper.load_model("small", device=DEVICE)
# Load summarization pipeline
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
def process_audio(audio_file):
# Convert audio to WAV format if not already
audio = AudioSegment.from_file(audio_file)
audio = audio.set_channels(1).set_frame_rate(16000)
temp_wav_path = "temp.wav"
audio.export(temp_wav_path, format="wav")
# Transcribe the audio
result = model.transcribe(temp_wav_path)
transcript = result["text"]
# Summarize the transcript
summary = summarizer(transcript, max_length=100, min_length=25, do_sample=False)[0]["summary_text"]
return transcript, summary
# Gradio Interface
interface = gr.Interface(
fn=process_audio,
inputs=gr.Audio(type="filepath"),
outputs=[
gr.Textbox(label="Transcript"),
gr.Textbox(label="Summary")
],
title="Audio Summarization App",
description="Upload an audio file to get its transcription and summary."
)
if __name__ == "__main__":
interface.launch()