helloatithya's picture
Update app.py
3f9f1f1 verified
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
import gradio as gr
from threading import Thread
from duckduckgo_search import DDGS
# --- Configuration ---
MODEL_ID = "helloatithya/Neura_Veltrixa"
GGUF_FILE = "neura_q4_k_m.gguf"
# Load Model & Tokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
model = AutoModelForCausalLM.from_pretrained(
MODEL_ID,
gguf_file=GGUF_FILE,
torch_dtype=torch.float32,
device_map="auto"
)
def web_search(query):
"""Fetches top 3 snippets from DuckDuckGo."""
try:
with DDGS() as ddgs:
results = [r for r in ddgs.text(query, max_results=3)]
context = "\n".join([f"[{r['title']}]: {r['body']}" for r in results])
return context
except Exception as e:
return f"Search failed: {str(e)}"
def generate_response(message, history):
# --- AUTO-SEARCH LOGIC ---
# Trigger search if keywords like "search", "who is", "latest", "what happened" are used
search_keywords = ["search", "who is", "current", "latest", "news", "weather", "today"]
context_text = ""
if any(k in message.lower() for k in search_keywords):
print(f"๐Ÿ” Searching the web for: {message}")
context_text = web_search(message)
# Construct Augmented Prompt
system_prompt = "You are Neura AI, a helpful assistant with internet access."
if context_text:
system_prompt += f"\n\nInternet Search Results:\n{context_text}\nUse the above info to answer accurately."
prompt = f"System: {system_prompt}\n"
for user_msg, bot_msg in history:
prompt += f"User: {user_msg}\nAssistant: {bot_msg}\n"
prompt += f"User: {message}\nAssistant:"
# --- Generation ---
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generation_kwargs = dict(
inputs,
streamer=streamer,
max_new_tokens=512,
do_sample=True,
temperature=0.7,
pad_token_id=tokenizer.eos_token_id
)
thread = Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()
partial_text = ""
for new_text in streamer:
partial_text += new_text
yield partial_text
# Utility for AI-generated titles
def get_chat_title(message):
prompt = f"System: Summarize into 3 words. Message: {message}\nAssistant:"
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
output = model.generate(**inputs, max_new_tokens=10)
title = tokenizer.decode(output[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True)
return title.strip()
# Gradio Setup (Same as before)
with gr.Blocks(theme=gr.themes.Soft()) as demo:
chat_api = gr.Chatbot()
msg_input = gr.Textbox(visible=False)
# Endpoints for HTML Frontend
chat_btn = gr.Button("Submit", visible=False)
chat_btn.click(generate_response, [msg_input, chat_api], [chat_api], api_name="chat")
title_btn = gr.Button("Get Title", visible=False)
title_btn.click(get_chat_title, [msg_input], [gr.Textbox()], api_name="get_title")
demo.queue().launch()