amasood commited on
Commit
a902cdb
·
verified ·
1 Parent(s): 7097b80

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -12
app.py CHANGED
@@ -1,29 +1,34 @@
1
  import streamlit as st
2
- import whisper # Correct import for OpenAI Whisper
3
- from transformers import LlamaForCausalLM, LlamaTokenizer
4
  from gtts import gTTS
5
  import os
6
 
7
- # Load models
 
 
 
8
  @st.cache_resource
9
  def load_whisper_model():
10
- return whisper.load_model("base") # Use the whisper package
11
 
 
12
  @st.cache_resource
13
  def load_llama_model():
14
- model_name = "TheBloke/Llama-2-7b-chat-hf" # Replace with your desired model
15
- tokenizer = LlamaTokenizer.from_pretrained(model_name)
16
- model = LlamaForCausalLM.from_pretrained(model_name)
17
  return tokenizer, model
18
 
 
19
  whisper_model = load_whisper_model()
20
  llama_tokenizer, llama_model = load_llama_model()
21
 
22
  # Streamlit App
23
  def main():
24
- st.title("Audio Query Application")
25
 
26
- # File Upload
27
  uploaded_file = st.file_uploader("Upload an audio file", type=["wav", "mp3", "m4a"])
28
 
29
  if uploaded_file is not None:
@@ -34,19 +39,19 @@ def main():
34
 
35
  st.audio(input_audio_path, format="audio/wav")
36
 
37
- # Step 1: Transcribe Audio
38
  with st.spinner("Transcribing audio..."):
39
  transcription = whisper_model.transcribe(input_audio_path)["text"]
40
  st.write(f"**Transcription:** {transcription}")
41
 
42
- # Step 2: Generate Response
43
  with st.spinner("Generating response..."):
44
  inputs = llama_tokenizer(transcription, return_tensors="pt")
45
  outputs = llama_model.generate(**inputs, max_length=150)
46
  response_text = llama_tokenizer.decode(outputs[0], skip_special_tokens=True)
47
  st.write(f"**Response:** {response_text}")
48
 
49
- # Step 3: Convert Text to Speech
50
  with st.spinner("Converting response to audio..."):
51
  response_audio_path = "response_audio.mp3"
52
  tts = gTTS(text=response_text, lang="en")
 
1
  import streamlit as st
2
+ import whisper
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer
4
  from gtts import gTTS
5
  import os
6
 
7
+ # Hugging Face Token (if using a private model)
8
+ HF_AUTH_TOKEN = "" # Replace with your token if needed; leave empty for public models
9
+
10
+ # Load Whisper Model
11
  @st.cache_resource
12
  def load_whisper_model():
13
+ return whisper.load_model("base")
14
 
15
+ # Load Llama-2 Model
16
  @st.cache_resource
17
  def load_llama_model():
18
+ model_name = "meta-llama/Llama-2-7b-chat-hf" # Official Llama-2 model from Meta
19
+ tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=HF_AUTH_TOKEN)
20
+ model = AutoModelForCausalLM.from_pretrained(model_name, use_auth_token=HF_AUTH_TOKEN, torch_dtype="auto")
21
  return tokenizer, model
22
 
23
+ # Initialize models
24
  whisper_model = load_whisper_model()
25
  llama_tokenizer, llama_model = load_llama_model()
26
 
27
  # Streamlit App
28
  def main():
29
+ st.title("Audio Query App with Llama-2 and Whisper")
30
 
31
+ # File upload
32
  uploaded_file = st.file_uploader("Upload an audio file", type=["wav", "mp3", "m4a"])
33
 
34
  if uploaded_file is not None:
 
39
 
40
  st.audio(input_audio_path, format="audio/wav")
41
 
42
+ # Step 1: Transcribe audio
43
  with st.spinner("Transcribing audio..."):
44
  transcription = whisper_model.transcribe(input_audio_path)["text"]
45
  st.write(f"**Transcription:** {transcription}")
46
 
47
+ # Step 2: Generate response using Llama-2
48
  with st.spinner("Generating response..."):
49
  inputs = llama_tokenizer(transcription, return_tensors="pt")
50
  outputs = llama_model.generate(**inputs, max_length=150)
51
  response_text = llama_tokenizer.decode(outputs[0], skip_special_tokens=True)
52
  st.write(f"**Response:** {response_text}")
53
 
54
+ # Step 3: Convert text response to audio
55
  with st.spinner("Converting response to audio..."):
56
  response_audio_path = "response_audio.mp3"
57
  tts = gTTS(text=response_text, lang="en")