Spaces:

gzsol
/

lab2

Sleeping

lab2 / app.py

zsolnai

Add oscar gradio

10da12a 3 months ago

9.81 kB

	import os
	import re
	import tempfile

	import gradio as gr
	import numpy as np
	import soundfile as sf
	import torch
	from ddgs import DDGS
	from huggingface_hub import hf_hub_download
	from llama_cpp import Llama
	from transformers import pipeline
	from TTS.api import TTS

	# --- Device Setup ---
	device = "cpu"

	# --- 1. STT Setup (Whisper) ---
	print("Loading Whisper...")
	STT_MODEL_NAME = "openai/whisper-tiny.en"
	stt_pipe = pipeline("automatic-speech-recognition", model=STT_MODEL_NAME, device=device)

	# --- 2. LLM Setup (Llama.cpp) ---
	print("Setting up Llama.cpp...")
	HF_API_TOKEN = os.getenv("HF_TOKEN")

	print("Downloading gzsol/model_1b GGUF...")
	model_path = hf_hub_download(
	repo_id="gzsol/model_1b",
	filename="model.gguf",
	token=HF_API_TOKEN,
	)

	print(f"Model path: {model_path}")
	print(f"File exists: {os.path.exists(model_path)}")
	if os.path.exists(model_path):
	print(f"File size: {os.path.getsize(model_path)} bytes")
	print(f"File size: {os.path.getsize(model_path) / (1024**3):.2f} GiB")

	print(f"Loading model from {model_path}...")
	llm = Llama(model_path=model_path, n_gpu_layers=0, n_ctx=2048)

	# --- 3. TTS Setup (Coqui) ---
	print("Loading TTS...")
	TTS_MODEL_NAME = "tts_models/en/ljspeech/tacotron2-DDC"
	tts_model = TTS(model_name=TTS_MODEL_NAME, progress_bar=False)


	# --- Core Functions ---
	def get_web_context(message):
	search_keywords = [
	"current",
	"latest",
	"recent",
	"today",
	"now",
	"news",
	"weather",
	"price",
	"2024",
	"2025",
	"what is happening",
	"score",
	"match",
	]

	if not any(keyword in message.lower() for keyword in search_keywords):
	return None

	try:
	with DDGS() as ddgs:
	results = list(ddgs.text(message, max_results=3))

	if not results:
	print("No search results found")
	return None

	print(f"Found {len(results)} results:")
	context = "Current information from web search:\n"
	for i, result in enumerate(results):
	print(f"Result {i+1}: {result['title']}")
	print(f" Body: {result['body'][:100]}...")
	context += f"- {result['title']}: {result['body'][:200]}...\n"

	return context

	except Exception as e:
	print(f"Search error: {e}")
	return None


	def chat_with_bot(message, history):
	if history is None:
	history = []

	if not message or not message.strip():
	return history, ""

	try:
	web_context = get_web_context(message=message)

	# Build conversation context from history
	conversation = ""
	for h in history:
	role = "User" if h.get("role") == "user" else "Assistant"
	conversation += f"{role}: {h.get('content', '')}\n"

	# Create a clearer prompt with system instruction
	if web_context:
	prompt = f"""Answer ONLY using this information:

	{web_context}

	Question: {message}
	Answer:"""
	print("The web context has been added to the prompt")
	else:
	prompt = f"""You are a helpful assistant. Answer naturally and conversationally.
	{conversation}User: {message}
	Assistant:"""

	print(f"Generating response with Llama...")

	# Generate response with stricter settings
	response = llm(
	prompt,
	max_tokens=200,
	temperature=0.7,
	top_p=0.95,
	stop=["User:", "\nUser:"],
	)

	response_str = response["choices"][0]["text"].strip()

	response_str = response_str.strip("'\"")
	response_str = response_str.rstrip(",:;")
	response_str = response_str.strip("'\"")
	response_str = re.sub(r"(\d+\.){10,}", "", response_str)

	if "User:" in response_str:
	response_str = response_str.split("User:")[0].strip()

	response_str = response_str.replace("[{", "").replace("}]", "")
	response_str = response_str.replace("'text':", "").replace('"text":', "")
	response_str = response_str.replace("'type': 'text'", "").replace(
	'"type": "text"', ""
	)

	if ", 'type'" in response_str or ', "type"' in response_str:
	response_str = (
	response_str.split(", 'type'")[0].split(', "type"')[0].strip()
	)

	# One final strip
	response_str = response_str.strip("'\",:;")

	if not response_str:
	response_str = "I received an empty response. Please try again."
	print("Warning: Empty response from LLM")

	history.append({"role": "user", "content": message})
	history.append({"role": "assistant", "content": response_str})

	return history, response_str

	except Exception as e:
	import traceback

	error_trace = traceback.format_exc()
	print(f"LLM Error: {e}")
	print(f"Full traceback:\n{error_trace}")

	error_msg = f"Error generating response: {str(e) if str(e) else 'Unknown error occurred'}"

	history.append({"role": "user", "content": message})
	history.append({"role": "assistant", "content": error_msg})
	return history, error_msg


	def text_to_speech_from_chat(chat_response):
	"""Takes the chat response and converts it to speech."""
	if not chat_response or chat_response.startswith("Error"):
	return None, "No valid response to synthesize."

	output_path = None
	try:
	temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
	output_path = temp_file.name
	temp_file.close()

	tts_model.tts_to_file(
	text=chat_response,
	file_path=output_path,
	)
	return output_path, "Speech synthesis complete."

	except Exception as e:
	if output_path and os.path.exists(output_path):
	os.remove(output_path)
	return None, f"Error during TTS: {e}"


	def speech_to_text_and_chat(audio_file_path, history):
	"""Performs STT, then Chatbot generation, returning the final response text and audio."""
	if audio_file_path is None:
	return "Please upload an audio file.", history, "", None, "Awaiting input."

	# 1. STT
	try:
	result = stt_pipe(audio_file_path)
	transcribed_text = result["text"]
	except Exception as e:
	return f"Error during STT: {e}", history, "", None, f"Error during STT: {e}"

	# 2. Chatbot (Your GGUF Model)
	updated_history, last_response_text = chat_with_bot(transcribed_text, history)

	# 3. TTS
	audio_path, status_text = text_to_speech_from_chat(last_response_text)

	return (
	transcribed_text,
	updated_history,
	last_response_text,
	audio_path,
	status_text,
	)


	# --- Gradio Interface ---
	custom_css = """
	#status { font-weight: bold; color: #2563eb; }
	.chatbot { height: 400px; }
	"""

	with gr.Blocks() as demo:
	gr.Markdown("# 🗣️ GGUF Voice Assistant (Running your model_1b)")
	gr.Markdown("Note: This app uses `gzsol/model_1b` (GGUF) on CPU.")

	# Global State
	# We no longer need 'chat_history_ids' because llama_cpp handles context internally via the messages list

	with gr.Tabs():

	# --- TAB 1: FULL VOICE CHAT ---
	with gr.TabItem("🗣️ Voice Assistant"):
	# CRITICAL FIX: type="messages"
	voice_chat_history = gr.Chatbot(
	label="Conversation Log",
	elem_classes=["chatbot"],
	value=[],
	)

	with gr.Row():
	audio_in = gr.Audio(
	sources=["microphone", "upload"],
	type="filepath",
	label="Input Audio",
	)
	voice_audio_out = gr.Audio(label="AI Voice Response", autoplay=True)

	voice_transcription = gr.Textbox(label="User Transcription")
	voice_response_text = gr.Textbox(label="AI Response (Text)")
	voice_status = gr.Textbox(elem_id="status", label="Status")

	run_btn = gr.Button("Transcribe, Chat & Speak", variant="primary")
	clear_voice_btn = gr.Button("Clear")

	run_btn.click(
	fn=speech_to_text_and_chat,
	inputs=[audio_in, voice_chat_history],
	outputs=[
	voice_transcription,
	voice_chat_history,
	voice_response_text,
	voice_audio_out,
	voice_status,
	],
	)

	clear_voice_btn.click(
	lambda: (None, [], "", None, ""),
	None,
	[
	audio_in,
	voice_chat_history,
	voice_response_text,
	voice_audio_out,
	voice_status,
	],
	)

	# --- TAB 2: TEXT CHAT ---
	with gr.TabItem("💬 Text Chat"):
	chatbot = gr.Chatbot(
	label="Conversation",
	elem_classes=["chatbot"],
	value=[],
	)
	msg = gr.Textbox(label="Message")
	submit_btn = gr.Button("Send")
	clear_btn = gr.Button("Clear")

	def chat_text_wrapper(message, history):
	h, _ = chat_with_bot(message, history)
	return h

	msg.submit(chat_text_wrapper, [msg, chatbot], [chatbot]).then(
	lambda: "", None, msg
	)
	submit_btn.click(chat_text_wrapper, [msg, chatbot], [chatbot]).then(
	lambda: "", None, msg
	)
	clear_btn.click(lambda: [], None, chatbot)

	demo.launch()