chhola14bhatoora's picture
Update app.py
31f7eff verified
#imports
import os
import gradio as gr
from deepgram import DeepgramClient, PrerecordedOptions
import google.generativeai as genai
# Initialize Deepgram Client
DEEPGRAM_API_KEY = os.getenv("DEEPGRAM")
GEMINI_API_KEY = os.getenv("GEMINI")
deepgram = DeepgramClient(DEEPGRAM_API_KEY)
# Configure the Gemini (Google Generative AI) API
genai.configure(api_key=GEMINI_API_KEY)
# Function to transcribe audio using Deepgram
def transcribe_audio(audio_path):
with open(audio_path, 'rb') as buffer_data:
payload = {'buffer': buffer_data}
options = PrerecordedOptions(
smart_format=True, model="nova-2", language="hi"
)
response = deepgram.listen.prerecorded.v('1').transcribe_file(payload, options)
# Extract the transcript from the response
transcript = response['results']['channels'][0]['alternatives'][0]['transcript']
return transcript
# Function to summarize the transcription using Gemini
def summarize_text(transcript):
prompt = f"This is the transcription of an audio file. It can be in Hindi, English, or another language. Generate a long summary with all the points in it in Hindi:\n\n{transcript}"
# Use Gemini model to generate the summary
model = genai.GenerativeModel('models/gemini-1.5-flash')
response = model.generate_content(prompt)
# Extract and return the summary
return response.text
# Wrapper function to handle both transcription and summarization
def process_audio(audio_path):
# Step 1: Transcribe the audio
transcript = transcribe_audio(audio_path)
# print(transcript)
# Step 2: Summarize the transcription
summary = summarize_text(transcript)
return summary
iface = gr.Interface(
fn=process_audio,
inputs=gr.Audio(sources="upload", type="filepath"),
outputs= gr.Textbox(label="Summary"),
title="Audio Summarization App",
description="Upload an audio file, and the app will transcribe and summarize it."
)
# Launch the app
iface.launch(share=True)