Spaces:

helloatithya
/

Neura_Veltrixa_chat

Sleeping

App Files Files Community

Neura_Veltrixa_chat / app.py

helloatithya

Update app.py

3f9f1f1 verified 6 days ago

raw

history blame contribute delete

3.22 kB

	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
	import gradio as gr
	from threading import Thread
	from duckduckgo_search import DDGS

	# --- Configuration ---
	MODEL_ID = "helloatithya/Neura_Veltrixa"
	GGUF_FILE = "neura_q4_k_m.gguf"

	# Load Model & Tokenizer
	tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
	model = AutoModelForCausalLM.from_pretrained(
	MODEL_ID,
	gguf_file=GGUF_FILE,
	torch_dtype=torch.float32,
	device_map="auto"
	)

	def web_search(query):
	"""Fetches top 3 snippets from DuckDuckGo."""
	try:
	with DDGS() as ddgs:
	results = [r for r in ddgs.text(query, max_results=3)]
	context = "\n".join([f"[{r['title']}]: {r['body']}" for r in results])
	return context
	except Exception as e:
	return f"Search failed: {str(e)}"

	def generate_response(message, history):
	# --- AUTO-SEARCH LOGIC ---
	# Trigger search if keywords like "search", "who is", "latest", "what happened" are used
	search_keywords = ["search", "who is", "current", "latest", "news", "weather", "today"]
	context_text = ""

	if any(k in message.lower() for k in search_keywords):
	print(f"🔍 Searching the web for: {message}")
	context_text = web_search(message)

	# Construct Augmented Prompt
	system_prompt = "You are Neura AI, a helpful assistant with internet access."
	if context_text:
	system_prompt += f"\n\nInternet Search Results:\n{context_text}\nUse the above info to answer accurately."

	prompt = f"System: {system_prompt}\n"
	for user_msg, bot_msg in history:
	prompt += f"User: {user_msg}\nAssistant: {bot_msg}\n"
	prompt += f"User: {message}\nAssistant:"

	# --- Generation ---
	inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
	streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

	generation_kwargs = dict(
	inputs,
	streamer=streamer,
	max_new_tokens=512,
	do_sample=True,
	temperature=0.7,
	pad_token_id=tokenizer.eos_token_id
	)

	thread = Thread(target=model.generate, kwargs=generation_kwargs)
	thread.start()

	partial_text = ""
	for new_text in streamer:
	partial_text += new_text
	yield partial_text

	# Utility for AI-generated titles
	def get_chat_title(message):
	prompt = f"System: Summarize into 3 words. Message: {message}\nAssistant:"
	inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
	output = model.generate(**inputs, max_new_tokens=10)
	title = tokenizer.decode(output[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True)
	return title.strip()

	# Gradio Setup (Same as before)
	with gr.Blocks(theme=gr.themes.Soft()) as demo:
	chat_api = gr.Chatbot()
	msg_input = gr.Textbox(visible=False)

	# Endpoints for HTML Frontend
	chat_btn = gr.Button("Submit", visible=False)
	chat_btn.click(generate_response, [msg_input, chat_api], [chat_api], api_name="chat")

	title_btn = gr.Button("Get Title", visible=False)
	title_btn.click(get_chat_title, [msg_input], [gr.Textbox()], api_name="get_title")

	demo.queue().launch()