Spaces:

chhola14bhatoora
/

Audio_Summarization

Sleeping

File size: 2,039 Bytes

#imports 
import os
import gradio as gr
from deepgram import DeepgramClient, PrerecordedOptions
import google.generativeai as genai
# Initialize Deepgram Client
DEEPGRAM_API_KEY = os.getenv("DEEPGRAM")
GEMINI_API_KEY = os.getenv("GEMINI")

deepgram = DeepgramClient(DEEPGRAM_API_KEY)

# Configure the Gemini (Google Generative AI) API
genai.configure(api_key=GEMINI_API_KEY)

# Function to transcribe audio using Deepgram
def transcribe_audio(audio_path):
    with open(audio_path, 'rb') as buffer_data:
        payload = {'buffer': buffer_data}
        options = PrerecordedOptions(
            smart_format=True, model="nova-2", language="hi"
        )
        response = deepgram.listen.prerecorded.v('1').transcribe_file(payload, options)
        
        # Extract the transcript from the response
        transcript = response['results']['channels'][0]['alternatives'][0]['transcript']
        return transcript

# Function to summarize the transcription using Gemini
def summarize_text(transcript):
    prompt = f"This is the transcription of an audio file. It can be in Hindi, English, or another language. Generate a long summary with all the points in it in Hindi:\n\n{transcript}"
    
    # Use Gemini model to generate the summary
    model = genai.GenerativeModel('models/gemini-1.5-flash')
    response = model.generate_content(prompt)
    
    # Extract and return the summary
    return response.text

# Wrapper function to handle both transcription and summarization
def process_audio(audio_path):
    # Step 1: Transcribe the audio
    transcript = transcribe_audio(audio_path)
    # print(transcript)
    # Step 2: Summarize the transcription
    summary = summarize_text(transcript)
    
    return summary


iface = gr.Interface(
    fn=process_audio,
    inputs=gr.Audio(sources="upload", type="filepath"),
    outputs= gr.Textbox(label="Summary"),
    title="Audio Summarization App",
    description="Upload an audio file, and the app will transcribe and summarize it."
)

# Launch the app
iface.launch(share=True)