#imports import os import gradio as gr from deepgram import DeepgramClient, PrerecordedOptions import google.generativeai as genai # Initialize Deepgram Client DEEPGRAM_API_KEY = os.getenv("DEEPGRAM") GEMINI_API_KEY = os.getenv("GEMINI") deepgram = DeepgramClient(DEEPGRAM_API_KEY) # Configure the Gemini (Google Generative AI) API genai.configure(api_key=GEMINI_API_KEY) # Function to transcribe audio using Deepgram def transcribe_audio(audio_path): with open(audio_path, 'rb') as buffer_data: payload = {'buffer': buffer_data} options = PrerecordedOptions( smart_format=True, model="nova-2", language="hi" ) response = deepgram.listen.prerecorded.v('1').transcribe_file(payload, options) # Extract the transcript from the response transcript = response['results']['channels'][0]['alternatives'][0]['transcript'] return transcript # Function to summarize the transcription using Gemini def summarize_text(transcript): prompt = f"This is the transcription of an audio file. It can be in Hindi, English, or another language. Generate a long summary with all the points in it in Hindi:\n\n{transcript}" # Use Gemini model to generate the summary model = genai.GenerativeModel('models/gemini-1.5-flash') response = model.generate_content(prompt) # Extract and return the summary return response.text # Wrapper function to handle both transcription and summarization def process_audio(audio_path): # Step 1: Transcribe the audio transcript = transcribe_audio(audio_path) # print(transcript) # Step 2: Summarize the transcription summary = summarize_text(transcript) return summary iface = gr.Interface( fn=process_audio, inputs=gr.Audio(sources="upload", type="filepath"), outputs= gr.Textbox(label="Summary"), title="Audio Summarization App", description="Upload an audio file, and the app will transcribe and summarize it." ) # Launch the app iface.launch(share=True)