File size: 2,039 Bytes
52f7d57 d32532a 52f7d57 31f7eff 382fa60 52f7d57 d32532a 52f7d57 d32532a 52f7d57 d32532a 52f7d57 d32532a 88bc6e1 d32532a 52f7d57 88bc6e1 d32532a 88bc6e1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 | #imports
import os
import gradio as gr
from deepgram import DeepgramClient, PrerecordedOptions
import google.generativeai as genai
# Initialize Deepgram Client
DEEPGRAM_API_KEY = os.getenv("DEEPGRAM")
GEMINI_API_KEY = os.getenv("GEMINI")
deepgram = DeepgramClient(DEEPGRAM_API_KEY)
# Configure the Gemini (Google Generative AI) API
genai.configure(api_key=GEMINI_API_KEY)
# Function to transcribe audio using Deepgram
def transcribe_audio(audio_path):
with open(audio_path, 'rb') as buffer_data:
payload = {'buffer': buffer_data}
options = PrerecordedOptions(
smart_format=True, model="nova-2", language="hi"
)
response = deepgram.listen.prerecorded.v('1').transcribe_file(payload, options)
# Extract the transcript from the response
transcript = response['results']['channels'][0]['alternatives'][0]['transcript']
return transcript
# Function to summarize the transcription using Gemini
def summarize_text(transcript):
prompt = f"This is the transcription of an audio file. It can be in Hindi, English, or another language. Generate a long summary with all the points in it in Hindi:\n\n{transcript}"
# Use Gemini model to generate the summary
model = genai.GenerativeModel('models/gemini-1.5-flash')
response = model.generate_content(prompt)
# Extract and return the summary
return response.text
# Wrapper function to handle both transcription and summarization
def process_audio(audio_path):
# Step 1: Transcribe the audio
transcript = transcribe_audio(audio_path)
# print(transcript)
# Step 2: Summarize the transcription
summary = summarize_text(transcript)
return summary
iface = gr.Interface(
fn=process_audio,
inputs=gr.Audio(sources="upload", type="filepath"),
outputs= gr.Textbox(label="Summary"),
title="Audio Summarization App",
description="Upload an audio file, and the app will transcribe and summarize it."
)
# Launch the app
iface.launch(share=True) |