File size: 2,039 Bytes
52f7d57
d32532a
 
52f7d57
 
 
31f7eff
 
382fa60
52f7d57
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d32532a
52f7d57
 
 
d32532a
52f7d57
 
 
 
 
 
 
 
 
 
d32532a
52f7d57
d32532a
88bc6e1
d32532a
52f7d57
88bc6e1
d32532a
 
 
 
 
 
88bc6e1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
#imports 
import os
import gradio as gr
from deepgram import DeepgramClient, PrerecordedOptions
import google.generativeai as genai
# Initialize Deepgram Client
DEEPGRAM_API_KEY = os.getenv("DEEPGRAM")
GEMINI_API_KEY = os.getenv("GEMINI")

deepgram = DeepgramClient(DEEPGRAM_API_KEY)

# Configure the Gemini (Google Generative AI) API
genai.configure(api_key=GEMINI_API_KEY)

# Function to transcribe audio using Deepgram
def transcribe_audio(audio_path):
    with open(audio_path, 'rb') as buffer_data:
        payload = {'buffer': buffer_data}
        options = PrerecordedOptions(
            smart_format=True, model="nova-2", language="hi"
        )
        response = deepgram.listen.prerecorded.v('1').transcribe_file(payload, options)
        
        # Extract the transcript from the response
        transcript = response['results']['channels'][0]['alternatives'][0]['transcript']
        return transcript

# Function to summarize the transcription using Gemini
def summarize_text(transcript):
    prompt = f"This is the transcription of an audio file. It can be in Hindi, English, or another language. Generate a long summary with all the points in it in Hindi:\n\n{transcript}"
    
    # Use Gemini model to generate the summary
    model = genai.GenerativeModel('models/gemini-1.5-flash')
    response = model.generate_content(prompt)
    
    # Extract and return the summary
    return response.text

# Wrapper function to handle both transcription and summarization
def process_audio(audio_path):
    # Step 1: Transcribe the audio
    transcript = transcribe_audio(audio_path)
    # print(transcript)
    # Step 2: Summarize the transcription
    summary = summarize_text(transcript)
    
    return summary


iface = gr.Interface(
    fn=process_audio,
    inputs=gr.Audio(sources="upload", type="filepath"),
    outputs= gr.Textbox(label="Summary"),
    title="Audio Summarization App",
    description="Upload an audio file, and the app will transcribe and summarize it."
)

# Launch the app
iface.launch(share=True)