File size: 9,813 Bytes
f9adb0f 10da12a 60cffca f9adb0f d6a854b f9adb0f 10da12a 959abf9 10da12a da03eac 102e36f f9adb0f da03eac 10da12a 4036a2f 10da12a da03eac f9adb0f 367abb6 f9adb0f 959abf9 17fc0b1 e4a6617 959abf9 e8d6716 959abf9 1f11d49 959abf9 ceae61f da03eac f9adb0f d6fb39f f9adb0f 102e36f 10da12a f9adb0f da03eac 24befd4 276657d da03eac f9adb0f 60cffca 10da12a 244a095 10da12a 244a095 10da12a 244a095 959abf9 10da12a 959abf9 e4a6617 10da12a 244a095 10da12a 73707e8 10da12a 959abf9 10da12a e4a6617 60cffca f7ff262 be5c874 276657d be5c874 276657d 60cffca be5c874 10da12a e4a6617 da03eac e4a6617 10da12a 959abf9 10da12a f7ff262 244a095 102e36f da03eac 102e36f da03eac 102e36f da03eac 102e36f da03eac 102e36f da03eac 102e36f 3944a6c 60cffca da03eac 60cffca 102e36f d6a854b da03eac f9adb0f da03eac 73707e8 60cffca 102e36f da03eac 102e36f 002e245 102e36f 002e245 f9adb0f 102e36f da03eac 102e36f da03eac 102e36f da03eac 102e36f da03eac 102e36f da03eac 102e36f da03eac 276657d 002e245 276657d da03eac 102e36f da03eac 102e36f da03eac 60cffca da03eac f9adb0f 10da12a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 | import os
import re
import tempfile
import gradio as gr
import numpy as np
import soundfile as sf
import torch
from ddgs import DDGS
from huggingface_hub import hf_hub_download
from llama_cpp import Llama
from transformers import pipeline
from TTS.api import TTS
# --- Device Setup ---
device = "cpu"
# --- 1. STT Setup (Whisper) ---
print("Loading Whisper...")
STT_MODEL_NAME = "openai/whisper-tiny.en"
stt_pipe = pipeline("automatic-speech-recognition", model=STT_MODEL_NAME, device=device)
# --- 2. LLM Setup (Llama.cpp) ---
print("Setting up Llama.cpp...")
HF_API_TOKEN = os.getenv("HF_TOKEN")
print("Downloading gzsol/model_1b GGUF...")
model_path = hf_hub_download(
repo_id="gzsol/model_1b",
filename="model.gguf",
token=HF_API_TOKEN,
)
print(f"Model path: {model_path}")
print(f"File exists: {os.path.exists(model_path)}")
if os.path.exists(model_path):
print(f"File size: {os.path.getsize(model_path)} bytes")
print(f"File size: {os.path.getsize(model_path) / (1024**3):.2f} GiB")
print(f"Loading model from {model_path}...")
llm = Llama(model_path=model_path, n_gpu_layers=0, n_ctx=2048)
# --- 3. TTS Setup (Coqui) ---
print("Loading TTS...")
TTS_MODEL_NAME = "tts_models/en/ljspeech/tacotron2-DDC"
tts_model = TTS(model_name=TTS_MODEL_NAME, progress_bar=False)
# --- Core Functions ---
def get_web_context(message):
search_keywords = [
"current",
"latest",
"recent",
"today",
"now",
"news",
"weather",
"price",
"2024",
"2025",
"what is happening",
"score",
"match",
]
if not any(keyword in message.lower() for keyword in search_keywords):
return None
try:
with DDGS() as ddgs:
results = list(ddgs.text(message, max_results=3))
if not results:
print("No search results found")
return None
print(f"Found {len(results)} results:")
context = "Current information from web search:\n"
for i, result in enumerate(results):
print(f"Result {i+1}: {result['title']}")
print(f" Body: {result['body'][:100]}...")
context += f"- {result['title']}: {result['body'][:200]}...\n"
return context
except Exception as e:
print(f"Search error: {e}")
return None
def chat_with_bot(message, history):
if history is None:
history = []
if not message or not message.strip():
return history, ""
try:
web_context = get_web_context(message=message)
# Build conversation context from history
conversation = ""
for h in history:
role = "User" if h.get("role") == "user" else "Assistant"
conversation += f"{role}: {h.get('content', '')}\n"
# Create a clearer prompt with system instruction
if web_context:
prompt = f"""Answer ONLY using this information:
{web_context}
Question: {message}
Answer:"""
print("The web context has been added to the prompt")
else:
prompt = f"""You are a helpful assistant. Answer naturally and conversationally.
{conversation}User: {message}
Assistant:"""
print(f"Generating response with Llama...")
# Generate response with stricter settings
response = llm(
prompt,
max_tokens=200,
temperature=0.7,
top_p=0.95,
stop=["User:", "\nUser:"],
)
response_str = response["choices"][0]["text"].strip()
response_str = response_str.strip("'\"")
response_str = response_str.rstrip(",:;")
response_str = response_str.strip("'\"")
response_str = re.sub(r"(\d+\.){10,}", "", response_str)
if "User:" in response_str:
response_str = response_str.split("User:")[0].strip()
response_str = response_str.replace("[{", "").replace("}]", "")
response_str = response_str.replace("'text':", "").replace('"text":', "")
response_str = response_str.replace("'type': 'text'", "").replace(
'"type": "text"', ""
)
if ", 'type'" in response_str or ', "type"' in response_str:
response_str = (
response_str.split(", 'type'")[0].split(', "type"')[0].strip()
)
# One final strip
response_str = response_str.strip("'\",:;")
if not response_str:
response_str = "I received an empty response. Please try again."
print("Warning: Empty response from LLM")
history.append({"role": "user", "content": message})
history.append({"role": "assistant", "content": response_str})
return history, response_str
except Exception as e:
import traceback
error_trace = traceback.format_exc()
print(f"LLM Error: {e}")
print(f"Full traceback:\n{error_trace}")
error_msg = f"Error generating response: {str(e) if str(e) else 'Unknown error occurred'}"
history.append({"role": "user", "content": message})
history.append({"role": "assistant", "content": error_msg})
return history, error_msg
def text_to_speech_from_chat(chat_response):
"""Takes the chat response and converts it to speech."""
if not chat_response or chat_response.startswith("Error"):
return None, "No valid response to synthesize."
output_path = None
try:
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
output_path = temp_file.name
temp_file.close()
tts_model.tts_to_file(
text=chat_response,
file_path=output_path,
)
return output_path, "Speech synthesis complete."
except Exception as e:
if output_path and os.path.exists(output_path):
os.remove(output_path)
return None, f"Error during TTS: {e}"
def speech_to_text_and_chat(audio_file_path, history):
"""Performs STT, then Chatbot generation, returning the final response text and audio."""
if audio_file_path is None:
return "Please upload an audio file.", history, "", None, "Awaiting input."
# 1. STT
try:
result = stt_pipe(audio_file_path)
transcribed_text = result["text"]
except Exception as e:
return f"Error during STT: {e}", history, "", None, f"Error during STT: {e}"
# 2. Chatbot (Your GGUF Model)
updated_history, last_response_text = chat_with_bot(transcribed_text, history)
# 3. TTS
audio_path, status_text = text_to_speech_from_chat(last_response_text)
return (
transcribed_text,
updated_history,
last_response_text,
audio_path,
status_text,
)
# --- Gradio Interface ---
custom_css = """
#status { font-weight: bold; color: #2563eb; }
.chatbot { height: 400px; }
"""
with gr.Blocks() as demo:
gr.Markdown("# 🗣️ GGUF Voice Assistant (Running your model_1b)")
gr.Markdown("**Note:** This app uses `gzsol/model_1b` (GGUF) on CPU.")
# Global State
# We no longer need 'chat_history_ids' because llama_cpp handles context internally via the messages list
with gr.Tabs():
# --- TAB 1: FULL VOICE CHAT ---
with gr.TabItem("🗣️ Voice Assistant"):
# CRITICAL FIX: type="messages"
voice_chat_history = gr.Chatbot(
label="Conversation Log",
elem_classes=["chatbot"],
value=[],
)
with gr.Row():
audio_in = gr.Audio(
sources=["microphone", "upload"],
type="filepath",
label="Input Audio",
)
voice_audio_out = gr.Audio(label="AI Voice Response", autoplay=True)
voice_transcription = gr.Textbox(label="User Transcription")
voice_response_text = gr.Textbox(label="AI Response (Text)")
voice_status = gr.Textbox(elem_id="status", label="Status")
run_btn = gr.Button("Transcribe, Chat & Speak", variant="primary")
clear_voice_btn = gr.Button("Clear")
run_btn.click(
fn=speech_to_text_and_chat,
inputs=[audio_in, voice_chat_history],
outputs=[
voice_transcription,
voice_chat_history,
voice_response_text,
voice_audio_out,
voice_status,
],
)
clear_voice_btn.click(
lambda: (None, [], "", None, ""),
None,
[
audio_in,
voice_chat_history,
voice_response_text,
voice_audio_out,
voice_status,
],
)
# --- TAB 2: TEXT CHAT ---
with gr.TabItem("💬 Text Chat"):
chatbot = gr.Chatbot(
label="Conversation",
elem_classes=["chatbot"],
value=[],
)
msg = gr.Textbox(label="Message")
submit_btn = gr.Button("Send")
clear_btn = gr.Button("Clear")
def chat_text_wrapper(message, history):
h, _ = chat_with_bot(message, history)
return h
msg.submit(chat_text_wrapper, [msg, chatbot], [chatbot]).then(
lambda: "", None, msg
)
submit_btn.click(chat_text_wrapper, [msg, chatbot], [chatbot]).then(
lambda: "", None, msg
)
clear_btn.click(lambda: [], None, chatbot)
demo.launch()
|