import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
import torch
from threading import Thread

MODEL_NAMES = {
    "LFM 350M": "LiquidAI/LFM2-350M",
    "LFM 700M": "LiquidAI/LFM2-700M",
    "LFM 1.2B": "LiquidAI/LFM2-1.2B",
}

model_cache = {}

def load_model(model_key):
    if model_key in model_cache:
        return model_cache[model_key]
    model_name = MODEL_NAMES[model_key]
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        dtype=torch.float16 if device == "cuda" else torch.float32,
    ).to(device)
    model_cache[model_key] = (tokenizer, model)
    return tokenizer, model

def chat_with_model(message, model_choice):
    tokenizer, model = load_model(model_choice)
    device = model.device
    
    # Absolute zero modification - your text goes straight to the AI
    prompt = message
    
    streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    
    generation_kwargs = dict(
        **inputs,
        streamer=streamer,
        max_new_tokens=1024,
        temperature=0.0,
        top_p=0.9,
        do_sample=True,
    )
    
    thread = Thread(target=model.generate, kwargs=generation_kwargs)
    thread.start()
    
    partial_text = ""
    for new_text in streamer:
        partial_text += new_text
        # Returns exactly one exchange: [User message, AI response]
        yield [[message, partial_text]]

def create_demo():
    # WhatsApp-inspired "Creeper" Dark Theme
    custom_theme = gr.themes.Soft(
        primary_hue="green",
        neutral_hue="slate",
    ).set(
        body_background_fill="*neutral_950",
        block_background_fill="*neutral_900",
        block_border_width="1px",
        block_label_text_color="*primary_500",
        button_primary_background_fill="*primary_600",
    )

    with gr.Blocks(theme=custom_theme, title="Creeper AI Chatbot") as demo:
        gr.Markdown("# 🌿 Creeper AI Chatbot")
        
        model_choice = gr.Dropdown(
            label="AI Brain (LFM)",
            choices=list(MODEL_NAMES.keys()),
            value="LFM 1.2B"
        )
        
        chatbot = gr.Chatbot(
            label="Chat View",
            height=500,
            bubble_full_width=False
        )
        
        with gr.Row():
            msg = gr.Textbox(
                label="Message",
                placeholder="Type here...",
                scale=4,
                show_label=False
            )
            submit_btn = gr.Button("Send", variant="primary", scale=1)
        
        clear = gr.Button("Clear Screen")
        
        # This handles the "No Memory" logic: 
        # Every time you hit send, it ignores history and just runs the current message.
        def start_chat(user_message):
            return "", [[user_message, None]]

        msg.submit(start_chat, [msg], [msg, chatbot]).then(
            chat_with_model, [msg, model_choice], chatbot
        )
        submit_btn.click(start_chat, [msg], [msg, chatbot]).then(
            chat_with_model, [msg, model_choice], chatbot
        )
        
        clear.click(lambda: None, None, chatbot, queue=False)
        
    return demo

if __name__ == "__main__":
    demo = create_demo()
    demo.queue()
    demo.launch(server_name="0.0.0.0", server_port=7860)