import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
import gradio as gr
from threading import Thread
from duckduckgo_search import DDGS

# --- Configuration ---
MODEL_ID = "helloatithya/Neura_Veltrixa"
GGUF_FILE = "neura_q4_k_m.gguf"

# Load Model & Tokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    gguf_file=GGUF_FILE,
    torch_dtype=torch.float32,
    device_map="auto"
)

def web_search(query):
    """Fetches top 3 snippets from DuckDuckGo."""
    try:
        with DDGS() as ddgs:
            results = [r for r in ddgs.text(query, max_results=3)]
            context = "\n".join([f"[{r['title']}]: {r['body']}" for r in results])
            return context
    except Exception as e:
        return f"Search failed: {str(e)}"

def generate_response(message, history):
    # --- AUTO-SEARCH LOGIC ---
    # Trigger search if keywords like "search", "who is", "latest", "what happened" are used
    search_keywords = ["search", "who is", "current", "latest", "news", "weather", "today"]
    context_text = ""
    
    if any(k in message.lower() for k in search_keywords):
        print(f"🔍 Searching the web for: {message}")
        context_text = web_search(message)

    # Construct Augmented Prompt
    system_prompt = "You are Neura AI, a helpful assistant with internet access."
    if context_text:
        system_prompt += f"\n\nInternet Search Results:\n{context_text}\nUse the above info to answer accurately."

    prompt = f"System: {system_prompt}\n"
    for user_msg, bot_msg in history:
        prompt += f"User: {user_msg}\nAssistant: {bot_msg}\n"
    prompt += f"User: {message}\nAssistant:"

    # --- Generation ---
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
    
    generation_kwargs = dict(
        inputs,
        streamer=streamer,
        max_new_tokens=512,
        do_sample=True,
        temperature=0.7,
        pad_token_id=tokenizer.eos_token_id
    )

    thread = Thread(target=model.generate, kwargs=generation_kwargs)
    thread.start()

    partial_text = ""
    for new_text in streamer:
        partial_text += new_text
        yield partial_text

# Utility for AI-generated titles
def get_chat_title(message):
    prompt = f"System: Summarize into 3 words. Message: {message}\nAssistant:"
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    output = model.generate(**inputs, max_new_tokens=10)
    title = tokenizer.decode(output[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True)
    return title.strip()

# Gradio Setup (Same as before)
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    chat_api = gr.Chatbot()
    msg_input = gr.Textbox(visible=False)
    
    # Endpoints for HTML Frontend
    chat_btn = gr.Button("Submit", visible=False)
    chat_btn.click(generate_response, [msg_input, chat_api], [chat_api], api_name="chat")
    
    title_btn = gr.Button("Get Title", visible=False)
    title_btn.click(get_chat_title, [msg_input], [gr.Textbox()], api_name="get_title")

demo.queue().launch()