Spaces:

amasood
/

talking_bot

Sleeping

App Files Files Community

amasood commited on Dec 18, 2024

Commit

a902cdb

verified ·

1 Parent(s): 7097b80

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -12

app.py CHANGED Viewed

@@ -1,29 +1,34 @@
 import streamlit as st
-import whisper  # Correct import for OpenAI Whisper
-from transformers import LlamaForCausalLM, LlamaTokenizer
 from gtts import gTTS
 import os
-# Load models
 @st.cache_resource
 def load_whisper_model():
-    return whisper.load_model("base")  # Use the whisper package
 @st.cache_resource
 def load_llama_model():
-    model_name = "TheBloke/Llama-2-7b-chat-hf"  # Replace with your desired model
-    tokenizer = LlamaTokenizer.from_pretrained(model_name)
-    model = LlamaForCausalLM.from_pretrained(model_name)
     return tokenizer, model
 whisper_model = load_whisper_model()
 llama_tokenizer, llama_model = load_llama_model()
 # Streamlit App
 def main():
-    st.title("Audio Query Application")
-    # File Upload
     uploaded_file = st.file_uploader("Upload an audio file", type=["wav", "mp3", "m4a"])
     if uploaded_file is not None:
@@ -34,19 +39,19 @@ def main():
         st.audio(input_audio_path, format="audio/wav")
-        # Step 1: Transcribe Audio
         with st.spinner("Transcribing audio..."):
             transcription = whisper_model.transcribe(input_audio_path)["text"]
         st.write(f"**Transcription:** {transcription}")
-        # Step 2: Generate Response
         with st.spinner("Generating response..."):
             inputs = llama_tokenizer(transcription, return_tensors="pt")
             outputs = llama_model.generate(**inputs, max_length=150)
             response_text = llama_tokenizer.decode(outputs[0], skip_special_tokens=True)
         st.write(f"**Response:** {response_text}")
-        # Step 3: Convert Text to Speech
         with st.spinner("Converting response to audio..."):
             response_audio_path = "response_audio.mp3"
             tts = gTTS(text=response_text, lang="en")

 import streamlit as st
+import whisper
+from transformers import AutoModelForCausalLM, AutoTokenizer
 from gtts import gTTS
 import os
+# Hugging Face Token (if using a private model)
+HF_AUTH_TOKEN = ""  # Replace with your token if needed; leave empty for public models
+# Load Whisper Model
 @st.cache_resource
 def load_whisper_model():
+    return whisper.load_model("base")
+# Load Llama-2 Model
 @st.cache_resource
 def load_llama_model():
+    model_name = "meta-llama/Llama-2-7b-chat-hf"  # Official Llama-2 model from Meta
+    tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=HF_AUTH_TOKEN)
+    model = AutoModelForCausalLM.from_pretrained(model_name, use_auth_token=HF_AUTH_TOKEN, torch_dtype="auto")
     return tokenizer, model
+# Initialize models
 whisper_model = load_whisper_model()
 llama_tokenizer, llama_model = load_llama_model()
 # Streamlit App
 def main():
+    st.title("Audio Query App with Llama-2 and Whisper")
+    # File upload
     uploaded_file = st.file_uploader("Upload an audio file", type=["wav", "mp3", "m4a"])
     if uploaded_file is not None:
         st.audio(input_audio_path, format="audio/wav")
+        # Step 1: Transcribe audio
         with st.spinner("Transcribing audio..."):
             transcription = whisper_model.transcribe(input_audio_path)["text"]
         st.write(f"**Transcription:** {transcription}")
+        # Step 2: Generate response using Llama-2
         with st.spinner("Generating response..."):
             inputs = llama_tokenizer(transcription, return_tensors="pt")
             outputs = llama_model.generate(**inputs, max_length=150)
             response_text = llama_tokenizer.decode(outputs[0], skip_special_tokens=True)
         st.write(f"**Response:** {response_text}")
+        # Step 3: Convert text response to audio
         with st.spinner("Converting response to audio..."):
             response_audio_path = "response_audio.mp3"
             tts = gTTS(text=response_text, lang="en")