Spaces:

chhola14bhatoora
/

Audio_Summarization

Sleeping

App Files Files Community

chhola14bhatoora commited on Aug 17, 2024

Commit

d32532a

verified ·

1 Parent(s): 84228cd

init

Browse files

Files changed (1) hide show

app.py +100 -0

app.py ADDED Viewed

	@@ -0,0 +1,100 @@

+import numpy as np # linear algebra
+import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
+from googletrans import Translator
+from huggingface_hub import hf_hub_download
+import os
+from huggingface_hub import login
+import whisper
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import torch
+import gradio as gr
+model_audio = whisper.load_model('large')
+# model.transcribe('/kaggle/input/testing/test1.wav',fp16=False,language="Hindi")['text']
+translator = Translator()
+def translate(audio_path):
+    options = dict(beam_size=5, best_of=5)
+    translate_options = dict(task="translate", **options)
+    result = model_audio.transcribe(audio_path,**translate_options)
+    return result["text"]
+# Retrieve the token from the environment variable
+secret_label = "HF_TOKEN"
+hf_token = os.getenv(secret_label)
+login(token = hf_token,add_to_git_credential=True)
+model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
+tokenizer = AutoTokenizer.from_pretrained(model_id , use_auth_token=hf_token)
+model = AutoModelForCausalLM.from_pretrained(
+    model_id,
+    token=hf_token,
+    torch_dtype=torch.bfloat16,
+    device_map="auto",
+)
+torch.backends.cuda.enable_mem_efficient_sdp(False)
+torch.backends.cuda.enable_flash_sdp(False)
+def llama3(query):
+    prompt = query
+    messages = [
+      {"role": "system", "content": " You are a helpful assistant and you have to generate a summary of the given prompt in a really good way and length can vary but it should clearly say all important details ."},
+      {"role": "user", "content": prompt},
+  ]
+    input_ids = tokenizer.apply_chat_template(
+      messages,
+      add_generation_prompt=True,
+      return_tensors="pt"
+  ).to(model.device)
+    terminators = [
+      tokenizer.eos_token_id,
+      tokenizer.convert_tokens_to_ids("<|eot_id|>")
+  ]
+    outputs = model.generate(
+      input_ids,
+      max_new_tokens=1024,
+      eos_token_id=terminators,
+      do_sample=True,
+      temperature=0.6,
+      top_p=0.9,
+  )
+    response = outputs[0][input_ids.shape[-1]:]
+    return tokenizer.decode(response, skip_special_tokens=True)
+def eng_to_hindi(summary):
+    translated = translator.translate(summary, src='en', dest='hi')
+    return translated.text
+def summarize(audio_path):
+    query=translate(audio_path)
+    summary=llama3(query)
+    return eng_to_hindi(summary)
+iface = gr.Interface(
+    fn=summarize,
+    inputs=gr.Audio(source="upload", type="file"),
+    outputs= gr.Textbox(label="Summary"),
+    title="Audio Summarization App",
+    description="Upload an audio file, and the app will transcribe and summarize it."
+)
+# Launch the app
+iface.launch()