|
|
|
|
|
import os |
|
|
import gradio as gr |
|
|
from deepgram import DeepgramClient, PrerecordedOptions |
|
|
import google.generativeai as genai |
|
|
|
|
|
DEEPGRAM_API_KEY = os.getenv("DEEPGRAM") |
|
|
GEMINI_API_KEY = os.getenv("GEMINI") |
|
|
|
|
|
deepgram = DeepgramClient(DEEPGRAM_API_KEY) |
|
|
|
|
|
|
|
|
genai.configure(api_key=GEMINI_API_KEY) |
|
|
|
|
|
|
|
|
def transcribe_audio(audio_path): |
|
|
with open(audio_path, 'rb') as buffer_data: |
|
|
payload = {'buffer': buffer_data} |
|
|
options = PrerecordedOptions( |
|
|
smart_format=True, model="nova-2", language="hi" |
|
|
) |
|
|
response = deepgram.listen.prerecorded.v('1').transcribe_file(payload, options) |
|
|
|
|
|
|
|
|
transcript = response['results']['channels'][0]['alternatives'][0]['transcript'] |
|
|
return transcript |
|
|
|
|
|
|
|
|
def summarize_text(transcript): |
|
|
prompt = f"This is the transcription of an audio file. It can be in Hindi, English, or another language. Generate a long summary with all the points in it in Hindi:\n\n{transcript}" |
|
|
|
|
|
|
|
|
model = genai.GenerativeModel('models/gemini-1.5-flash') |
|
|
response = model.generate_content(prompt) |
|
|
|
|
|
|
|
|
return response.text |
|
|
|
|
|
|
|
|
def process_audio(audio_path): |
|
|
|
|
|
transcript = transcribe_audio(audio_path) |
|
|
|
|
|
|
|
|
summary = summarize_text(transcript) |
|
|
|
|
|
return summary |
|
|
|
|
|
|
|
|
iface = gr.Interface( |
|
|
fn=process_audio, |
|
|
inputs=gr.Audio(sources="upload", type="filepath"), |
|
|
outputs= gr.Textbox(label="Summary"), |
|
|
title="Audio Summarization App", |
|
|
description="Upload an audio file, and the app will transcribe and summarize it." |
|
|
) |
|
|
|
|
|
|
|
|
iface.launch(share=True) |