chhola14bhatoora commited on
Commit
52f7d57
·
verified ·
1 Parent(s): b790f8d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -87
app.py CHANGED
@@ -1,98 +1,53 @@
1
-
2
- import numpy as np # linear algebra
3
- import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
4
- import httpcore
5
- setattr(httpcore, 'SyncHTTPTransport', 'any')
6
-
7
- from googletrans import Translator
8
- from huggingface_hub import hf_hub_download
9
  import os
10
- from huggingface_hub import login
11
- import whisper
12
- from transformers import AutoTokenizer, AutoModelForCausalLM
13
- import torch
14
  import gradio as gr
15
-
16
- model_audio = whisper.load_model('large')
17
- # model.transcribe('/kaggle/input/testing/test1.wav',fp16=False,language="Hindi")['text']
18
-
19
- translator = Translator()
20
- def translate(audio_path):
21
- options = dict(beam_size=5, best_of=5)
22
- translate_options = dict(task="translate", **options)
23
- result = model_audio.transcribe(audio_path,**translate_options)
24
- return result["text"]
25
-
26
-
27
-
28
- # Retrieve the token from the environment variable
29
- secret_label = "HF_TOKEN"
30
- hf_token = os.getenv(secret_label)
31
- login(token = hf_token,add_to_git_credential=True)
32
-
33
-
34
-
35
-
36
- model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
37
-
38
- tokenizer = AutoTokenizer.from_pretrained(model_id , use_auth_token=hf_token)
39
- model = AutoModelForCausalLM.from_pretrained(
40
- model_id,
41
- token=hf_token,
42
- torch_dtype=torch.bfloat16,
43
- device_map="auto",
44
 
45
- )
46
- torch.backends.cuda.enable_mem_efficient_sdp(False)
47
- torch.backends.cuda.enable_flash_sdp(False)
48
-
49
-
50
-
51
-
52
- def llama3(query):
53
- prompt = query
54
- messages = [
55
- {"role": "system", "content": " You are a helpful assistant and you have to generate a summary of the given prompt in a really good way and length can vary but it should clearly say all important details ."},
56
- {"role": "user", "content": prompt},
57
- ]
58
-
59
- input_ids = tokenizer.apply_chat_template(
60
- messages,
61
- add_generation_prompt=True,
62
- return_tensors="pt"
63
- ).to(model.device)
64
-
65
- terminators = [
66
- tokenizer.eos_token_id,
67
- tokenizer.convert_tokens_to_ids("<|eot_id|>")
68
- ]
69
-
70
- outputs = model.generate(
71
- input_ids,
72
- max_new_tokens=1024,
73
- eos_token_id=terminators,
74
- do_sample=True,
75
- temperature=0.6,
76
- top_p=0.9,
77
- )
78
- response = outputs[0][input_ids.shape[-1]:]
79
- return tokenizer.decode(response, skip_special_tokens=True)
80
-
81
-
82
- def eng_to_hindi(summary):
83
- translated = translator.translate(summary, src='en', dest='hi')
84
- return translated.text
85
-
86
- def summarize(audio_path):
87
 
88
- query=translate(audio_path)
89
- summary=llama3(query)
90
- return eng_to_hindi(summary)
 
 
 
 
 
 
 
91
 
 
92
 
93
 
94
  iface = gr.Interface(
95
- fn=summarize,
96
  inputs=gr.Audio(sources="upload", type="filepath"),
97
  outputs= gr.Textbox(label="Summary"),
98
  title="Audio Summarization App",
 
1
+ #imports
 
 
 
 
 
 
 
2
  import os
 
 
 
 
3
  import gradio as gr
4
+ from deepgram import DeepgramClient, PrerecordedOptions
5
+ import google.generativeai as genai
6
+ # Initialize Deepgram Client
7
+ DEEPGRAM_API_KEY = "3ed8d31c6d9ea6db993727870314a7a1bd43f765"
8
+ GEMINI_API_KEY = "AIzaSyAcyXuHl46luBwTQ
9
+ deepgram = DeepgramClient(DEEPGRAM_API_KEY)
10
+
11
+ # Configure the Gemini (Google Generative AI) API
12
+ genai.configure(api_key=GEMINI_API_KEY)
13
+
14
+ # Function to transcribe audio using Deepgram
15
+ def transcribe_audio(audio_path):
16
+ with open(audio_path, 'rb') as buffer_data:
17
+ payload = {'buffer': buffer_data}
18
+ options = PrerecordedOptions(
19
+ smart_format=True, model="nova-2", language="hi"
20
+ )
21
+ response = deepgram.listen.prerecorded.v('1').transcribe_file(payload, options)
22
+
23
+ # Extract the transcript from the response
24
+ transcript = response['results']['channels'][0]['alternatives'][0]['transcript']
25
+ return transcript
26
+
27
+ # Function to summarize the transcription using Gemini
28
+ def summarize_text(transcript):
29
+ prompt = f"This is the transcription of an audio file. It can be in Hindi, English, or another language. Generate a long summary with all the points in it in Hindi:\n\n{transcript}"
 
 
 
30
 
31
+ # Use Gemini model to generate the summary
32
+ model = genai.GenerativeModel('models/gemini-1.5-flash')
33
+ response = model.generate_content(prompt)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
+ # Extract and return the summary
36
+ return response.text
37
+
38
+ # Wrapper function to handle both transcription and summarization
39
+ def process_audio(audio_path):
40
+ # Step 1: Transcribe the audio
41
+ transcript = transcribe_audio(audio_path)
42
+ # print(transcript)
43
+ # Step 2: Summarize the transcription
44
+ summary = summarize_text(transcript)
45
 
46
+ return summary
47
 
48
 
49
  iface = gr.Interface(
50
+ fn=process_audio,
51
  inputs=gr.Audio(sources="upload", type="filepath"),
52
  outputs= gr.Textbox(label="Summary"),
53
  title="Audio Summarization App",