test / app.py
WWMachine's picture
Update app.py
f4264e5 verified
raw
history blame
5.57 kB
import gradio as gr
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
import os
from deepgram import DeepgramClient, PrerecordedOptions, SpeakOptions
import time
# --- Configuration ---
DEEPGRAM_API_KEY = os.getenv("DEEPGRAM_API_KEY") # Ensure this is set in Space Settings
REPO_ID = "Kezovic/iris-q4gguf-v2"
FILENAME = "llama-3.2-1b-instruct.Q4_K_M.gguf"
CONTEXT_WINDOW = 4096
MAX_NEW_TOKENS = 512
TEMPERATURE = 0.7
# --- Initialize Deepgram ---
if not DEEPGRAM_API_KEY:
print("Error: DEEPGRAM_API_KEY is missing.")
deepgram = None
else:
deepgram = DeepgramClient(DEEPGRAM_API_KEY)
# --- Load LLM ---
llm = None
def load_llm():
global llm
print("Downloading LLM...")
try:
model_path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME)
llm = Llama(
model_path=model_path,
n_ctx=CONTEXT_WINDOW,
n_threads=2,
verbose=False
)
print("LLM loaded!")
except Exception as e:
print(f"Error loading model: {e}")
load_llm()
# --- Helper Functions ---
def transcribe(audio_path):
"""Converts Speech to Text using Deepgram Nova-2"""
if not audio_path or deepgram is None:
return None
try:
with open(audio_path, "rb") as buffer:
payload = {"buffer": buffer}
options = PrerecordedOptions(smart_format=True, model="nova-2", language="en-US")
response = deepgram.listen.rest.v("1").transcribe_file(payload, options)
return response.results.channels[0].alternatives[0].transcript
except Exception as e:
print(f"STT Error: {e}")
return None
def speak(text):
"""Converts Text to Speech using Deepgram Aura"""
if not text or deepgram is None:
return None
try:
filename = f"response_{int(time.time())}.mp3"
options = SpeakOptions(model="aura-asteria-en", encoding="linear16", container="wav")
deepgram.speak.rest.v("1").save(filename, {"text": text}, options)
return filename
except Exception as e:
print(f"TTS Error: {e}")
return None
# --- Main Logic ---
def run_chat_pipeline(audio_input, history, state_messages):
"""
1. Transcribe Audio -> Update UI with User Text
2. Query LLM -> Update UI with AI Text
3. Generate Audio -> Auto-play response
"""
if llm is None:
return history, state_messages, None
# --- Step 1: User Speech to Text ---
user_text = transcribe(audio_input)
if not user_text:
# If silence or error, return existing state without changes
return history, state_messages, None
# Update internal memory (Standard OpenAI/Llama format)
state_messages.append({"role": "user", "content": user_text})
# Update UI History (Gradio Chatbot format: list of [user_msg, bot_msg])
# We add the user message temporarily with a pending bot response
history.append((user_text, None))
# --- Step 2: LLM Generation ---
try:
completion = llm.create_chat_completion(
messages=state_messages,
max_tokens=MAX_NEW_TOKENS,
temperature=TEMPERATURE
)
ai_text = completion['choices'][0]['message']['content']
except Exception as e:
ai_text = f"Error: {str(e)}"
# Update internal memory with AI response
state_messages.append({"role": "assistant", "content": ai_text})
# Update UI History: Replace the 'None' with the actual AI text
history[-1] = (user_text, ai_text)
# --- Step 3: Text to Speech ---
audio_path = speak(ai_text)
# Return: Updated Chatbot UI, Updated Internal State, Audio File
return history, state_messages, audio_path
# --- Gradio UI Layout ---
with gr.Blocks(title="Voice Chatbot") as demo:
gr.Markdown("## 🎙️ Voice-First AI Chat")
# 1. Visual Conversation History (The "Screen")
chatbot = gr.Chatbot(
label="Conversation",
type="messages", # Uses newer Gradio format if available, else standard
height=500
)
# 2. State (Hidden Memory)
state_messages = gr.State([]) # Stores [{"role":"user", "content":"..."}, ...]
# 3. Audio Interaction Area
with gr.Row():
with gr.Column(scale=4):
# Input Microphone
audio_input = gr.Audio(
sources=["microphone"],
type="filepath",
label="Record Your Message"
)
with gr.Column(scale=1):
# Send Button
submit_btn = gr.Button("Send Voice 💬", variant="primary")
clear_btn = gr.Button("Clear Chat 🗑️")
# 4. Hidden Output Audio (For Autoplay)
# We make it visible=False so it doesn't clutter UI,
# but Gradio still plays it if we return it to this component.
# Note: Some browsers block autoplay from hidden components.
# If it doesn't play, set visible=True.
audio_player = gr.Audio(
label="AI Voice",
autoplay=True,
visible=True, # Kept visible for control, can set to False
interactive=False
)
# --- Event Wiring ---
submit_btn.click(
fn=run_chat_pipeline,
inputs=[audio_input, chatbot, state_messages],
outputs=[chatbot, state_messages, audio_player]
)
# Clear Logic
def clear_all():
return [], [], None
clear_btn.click(
fn=clear_all,
inputs=None,
outputs=[chatbot, state_messages, audio_player]
)
if __name__ == "__main__":
demo.launch()