Spaces:

WWMachine
/

test

Sleeping

App Files Files Community

test / app.py

WWMachine

Update app.py

f4264e5 verified 5 months ago

raw

history blame

5.57 kB

	import gradio as gr
	from llama_cpp import Llama
	from huggingface_hub import hf_hub_download
	import os
	from deepgram import DeepgramClient, PrerecordedOptions, SpeakOptions
	import time

	# --- Configuration ---
	DEEPGRAM_API_KEY = os.getenv("DEEPGRAM_API_KEY") # Ensure this is set in Space Settings
	REPO_ID = "Kezovic/iris-q4gguf-v2"
	FILENAME = "llama-3.2-1b-instruct.Q4_K_M.gguf"
	CONTEXT_WINDOW = 4096
	MAX_NEW_TOKENS = 512
	TEMPERATURE = 0.7

	# --- Initialize Deepgram ---
	if not DEEPGRAM_API_KEY:
	print("Error: DEEPGRAM_API_KEY is missing.")
	deepgram = None
	else:
	deepgram = DeepgramClient(DEEPGRAM_API_KEY)

	# --- Load LLM ---
	llm = None
	def load_llm():
	global llm
	print("Downloading LLM...")
	try:
	model_path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME)
	llm = Llama(
	model_path=model_path,
	n_ctx=CONTEXT_WINDOW,
	n_threads=2,
	verbose=False
	)
	print("LLM loaded!")
	except Exception as e:
	print(f"Error loading model: {e}")

	load_llm()

	# --- Helper Functions ---

	def transcribe(audio_path):
	"""Converts Speech to Text using Deepgram Nova-2"""
	if not audio_path or deepgram is None:
	return None
	try:
	with open(audio_path, "rb") as buffer:
	payload = {"buffer": buffer}
	options = PrerecordedOptions(smart_format=True, model="nova-2", language="en-US")
	response = deepgram.listen.rest.v("1").transcribe_file(payload, options)
	return response.results.channels[0].alternatives[0].transcript
	except Exception as e:
	print(f"STT Error: {e}")
	return None

	def speak(text):
	"""Converts Text to Speech using Deepgram Aura"""
	if not text or deepgram is None:
	return None
	try:
	filename = f"response_{int(time.time())}.mp3"
	options = SpeakOptions(model="aura-asteria-en", encoding="linear16", container="wav")
	deepgram.speak.rest.v("1").save(filename, {"text": text}, options)
	return filename
	except Exception as e:
	print(f"TTS Error: {e}")
	return None

	# --- Main Logic ---

	def run_chat_pipeline(audio_input, history, state_messages):
	"""
	1. Transcribe Audio -> Update UI with User Text
	2. Query LLM -> Update UI with AI Text
	3. Generate Audio -> Auto-play response
	"""
	if llm is None:
	return history, state_messages, None

	# --- Step 1: User Speech to Text ---
	user_text = transcribe(audio_input)

	if not user_text:
	# If silence or error, return existing state without changes
	return history, state_messages, None

	# Update internal memory (Standard OpenAI/Llama format)
	state_messages.append({"role": "user", "content": user_text})

	# Update UI History (Gradio Chatbot format: list of [user_msg, bot_msg])
	# We add the user message temporarily with a pending bot response
	history.append((user_text, None))

	# --- Step 2: LLM Generation ---
	try:
	completion = llm.create_chat_completion(
	messages=state_messages,
	max_tokens=MAX_NEW_TOKENS,
	temperature=TEMPERATURE
	)
	ai_text = completion['choices'][0]['message']['content']
	except Exception as e:
	ai_text = f"Error: {str(e)}"

	# Update internal memory with AI response
	state_messages.append({"role": "assistant", "content": ai_text})

	# Update UI History: Replace the 'None' with the actual AI text
	history[-1] = (user_text, ai_text)

	# --- Step 3: Text to Speech ---
	audio_path = speak(ai_text)

	# Return: Updated Chatbot UI, Updated Internal State, Audio File
	return history, state_messages, audio_path

	# --- Gradio UI Layout ---
	with gr.Blocks(title="Voice Chatbot") as demo:
	gr.Markdown("## 🎙️ Voice-First AI Chat")

	# 1. Visual Conversation History (The "Screen")
	chatbot = gr.Chatbot(
	label="Conversation",
	type="messages", # Uses newer Gradio format if available, else standard
	height=500
	)

	# 2. State (Hidden Memory)
	state_messages = gr.State([]) # Stores [{"role":"user", "content":"..."}, ...]

	# 3. Audio Interaction Area
	with gr.Row():
	with gr.Column(scale=4):
	# Input Microphone
	audio_input = gr.Audio(
	sources=["microphone"],
	type="filepath",
	label="Record Your Message"
	)
	with gr.Column(scale=1):
	# Send Button
	submit_btn = gr.Button("Send Voice 💬", variant="primary")
	clear_btn = gr.Button("Clear Chat 🗑️")

	# 4. Hidden Output Audio (For Autoplay)
	# We make it visible=False so it doesn't clutter UI,
	# but Gradio still plays it if we return it to this component.
	# Note: Some browsers block autoplay from hidden components.
	# If it doesn't play, set visible=True.
	audio_player = gr.Audio(
	label="AI Voice",
	autoplay=True,
	visible=True, # Kept visible for control, can set to False
	interactive=False
	)

	# --- Event Wiring ---

	submit_btn.click(
	fn=run_chat_pipeline,
	inputs=[audio_input, chatbot, state_messages],
	outputs=[chatbot, state_messages, audio_player]
	)

	# Clear Logic
	def clear_all():
	return [], [], None

	clear_btn.click(
	fn=clear_all,
	inputs=None,
	outputs=[chatbot, state_messages, audio_player]
	)

	if __name__ == "__main__":
	demo.launch()