chhola14bhatoora commited on
Commit
d32532a
·
verified ·
1 Parent(s): 84228cd
Files changed (1) hide show
  1. app.py +100 -0
app.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import numpy as np # linear algebra
3
+ import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
4
+ from googletrans import Translator
5
+ from huggingface_hub import hf_hub_download
6
+ import os
7
+ from huggingface_hub import login
8
+ import whisper
9
+ from transformers import AutoTokenizer, AutoModelForCausalLM
10
+ import torch
11
+ import gradio as gr
12
+
13
+
14
+ model_audio = whisper.load_model('large')
15
+ # model.transcribe('/kaggle/input/testing/test1.wav',fp16=False,language="Hindi")['text']
16
+
17
+ translator = Translator()
18
+ def translate(audio_path):
19
+ options = dict(beam_size=5, best_of=5)
20
+ translate_options = dict(task="translate", **options)
21
+ result = model_audio.transcribe(audio_path,**translate_options)
22
+ return result["text"]
23
+
24
+
25
+
26
+ # Retrieve the token from the environment variable
27
+ secret_label = "HF_TOKEN"
28
+ hf_token = os.getenv(secret_label)
29
+ login(token = hf_token,add_to_git_credential=True)
30
+
31
+
32
+
33
+
34
+ model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
35
+
36
+ tokenizer = AutoTokenizer.from_pretrained(model_id , use_auth_token=hf_token)
37
+ model = AutoModelForCausalLM.from_pretrained(
38
+ model_id,
39
+ token=hf_token,
40
+ torch_dtype=torch.bfloat16,
41
+ device_map="auto",
42
+
43
+ )
44
+ torch.backends.cuda.enable_mem_efficient_sdp(False)
45
+ torch.backends.cuda.enable_flash_sdp(False)
46
+
47
+
48
+
49
+
50
+ def llama3(query):
51
+ prompt = query
52
+ messages = [
53
+ {"role": "system", "content": " You are a helpful assistant and you have to generate a summary of the given prompt in a really good way and length can vary but it should clearly say all important details ."},
54
+ {"role": "user", "content": prompt},
55
+ ]
56
+
57
+ input_ids = tokenizer.apply_chat_template(
58
+ messages,
59
+ add_generation_prompt=True,
60
+ return_tensors="pt"
61
+ ).to(model.device)
62
+
63
+ terminators = [
64
+ tokenizer.eos_token_id,
65
+ tokenizer.convert_tokens_to_ids("<|eot_id|>")
66
+ ]
67
+
68
+ outputs = model.generate(
69
+ input_ids,
70
+ max_new_tokens=1024,
71
+ eos_token_id=terminators,
72
+ do_sample=True,
73
+ temperature=0.6,
74
+ top_p=0.9,
75
+ )
76
+ response = outputs[0][input_ids.shape[-1]:]
77
+ return tokenizer.decode(response, skip_special_tokens=True)
78
+
79
+
80
+ def eng_to_hindi(summary):
81
+ translated = translator.translate(summary, src='en', dest='hi')
82
+ return translated.text
83
+
84
+ def summarize(audio_path):
85
+
86
+ query=translate(audio_path)
87
+ summary=llama3(query)
88
+ return eng_to_hindi(summary)
89
+
90
+
91
+ iface = gr.Interface(
92
+ fn=summarize,
93
+ inputs=gr.Audio(source="upload", type="file"),
94
+ outputs= gr.Textbox(label="Summary"),
95
+ title="Audio Summarization App",
96
+ description="Upload an audio file, and the app will transcribe and summarize it."
97
+ )
98
+
99
+ # Launch the app
100
+ iface.launch()