Spaces:
Sleeping
Sleeping
| import torch | |
| from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer | |
| import gradio as gr | |
| from threading import Thread | |
| from duckduckgo_search import DDGS | |
| # --- Configuration --- | |
| MODEL_ID = "helloatithya/Neura_Veltrixa" | |
| GGUF_FILE = "neura_q4_k_m.gguf" | |
| # Load Model & Tokenizer | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| MODEL_ID, | |
| gguf_file=GGUF_FILE, | |
| torch_dtype=torch.float32, | |
| device_map="auto" | |
| ) | |
| def web_search(query): | |
| """Fetches top 3 snippets from DuckDuckGo.""" | |
| try: | |
| with DDGS() as ddgs: | |
| results = [r for r in ddgs.text(query, max_results=3)] | |
| context = "\n".join([f"[{r['title']}]: {r['body']}" for r in results]) | |
| return context | |
| except Exception as e: | |
| return f"Search failed: {str(e)}" | |
| def generate_response(message, history): | |
| # --- AUTO-SEARCH LOGIC --- | |
| # Trigger search if keywords like "search", "who is", "latest", "what happened" are used | |
| search_keywords = ["search", "who is", "current", "latest", "news", "weather", "today"] | |
| context_text = "" | |
| if any(k in message.lower() for k in search_keywords): | |
| print(f"๐ Searching the web for: {message}") | |
| context_text = web_search(message) | |
| # Construct Augmented Prompt | |
| system_prompt = "You are Neura AI, a helpful assistant with internet access." | |
| if context_text: | |
| system_prompt += f"\n\nInternet Search Results:\n{context_text}\nUse the above info to answer accurately." | |
| prompt = f"System: {system_prompt}\n" | |
| for user_msg, bot_msg in history: | |
| prompt += f"User: {user_msg}\nAssistant: {bot_msg}\n" | |
| prompt += f"User: {message}\nAssistant:" | |
| # --- Generation --- | |
| inputs = tokenizer(prompt, return_tensors="pt").to(model.device) | |
| streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) | |
| generation_kwargs = dict( | |
| inputs, | |
| streamer=streamer, | |
| max_new_tokens=512, | |
| do_sample=True, | |
| temperature=0.7, | |
| pad_token_id=tokenizer.eos_token_id | |
| ) | |
| thread = Thread(target=model.generate, kwargs=generation_kwargs) | |
| thread.start() | |
| partial_text = "" | |
| for new_text in streamer: | |
| partial_text += new_text | |
| yield partial_text | |
| # Utility for AI-generated titles | |
| def get_chat_title(message): | |
| prompt = f"System: Summarize into 3 words. Message: {message}\nAssistant:" | |
| inputs = tokenizer(prompt, return_tensors="pt").to(model.device) | |
| output = model.generate(**inputs, max_new_tokens=10) | |
| title = tokenizer.decode(output[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True) | |
| return title.strip() | |
| # Gradio Setup (Same as before) | |
| with gr.Blocks(theme=gr.themes.Soft()) as demo: | |
| chat_api = gr.Chatbot() | |
| msg_input = gr.Textbox(visible=False) | |
| # Endpoints for HTML Frontend | |
| chat_btn = gr.Button("Submit", visible=False) | |
| chat_btn.click(generate_response, [msg_input, chat_api], [chat_api], api_name="chat") | |
| title_btn = gr.Button("Get Title", visible=False) | |
| title_btn.click(get_chat_title, [msg_input], [gr.Textbox()], api_name="get_title") | |
| demo.queue().launch() |