# =============================
# Shay Chatbot — Hugging Face Space
# =============================

import warnings
warnings.filterwarnings("ignore", category=ResourceWarning)
warnings.filterwarnings("ignore", category=RuntimeWarning)

import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

# -----------------------------
# Model Lazy Loading
# -----------------------------
MODEL_ID = "your-username/Shay"
model = None
tokenizer = None

def load_model():
    global model, tokenizer
    if model is None or tokenizer is None:
        tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, use_fast=True, trust_remote_code=True)
        model = AutoModelForCausalLM.from_pretrained(
            MODEL_ID,
            device_map="auto",
            torch_dtype=torch.bfloat16,
            trust_remote_code=True,
            rope_scaling={"type": "dynamic", "factor": 10.0}
        )

# -----------------------------
# Chat Function
# -----------------------------
def generate_chat(user_message, history, max_tokens, temperature, top_p, top_k):
    load_model()
    if history is None:
        history = []

    # Build prompt including full conversation
    prompt = "<|system|>You are Shay, an intelligent, unbiased, emotionless AI assistant.\n"
    for u, b in history:
        prompt += f"<|user|>{u}<|end|>\n<|assistant|>{b}<|end|>\n"
    prompt += f"<|user|>{user_message}<|end|>\n<|assistant|>"

    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    output = model.generate(
        **inputs,
        max_new_tokens=int(max_tokens),
        temperature=float(temperature),
        top_p=float(top_p),
        top_k=int(top_k),
        repetition_penalty=1.1,
        do_sample=True
    )

    reply = tokenizer.decode(output[0], skip_special_tokens=True).split("<|assistant|>")[-1].strip()
    history.append((user_message, reply))
    return history

# -----------------------------
# Utility Functions
# -----------------------------
def copy_last(history):
    return history[-1][1] if history else ""

def copy_all(history):
    return "\n".join([f"User: {u}\nShay: {b}" for u, b in history]) if history else ""

# -----------------------------
# Dark Theme CSS
# -----------------------------
DARK_CSS = """
body { background-color: #111; color: #eee; }
.gradio-container { background-color: #111; max-width: 100% !important; }
.gradio-chatbot { background-color: #222; color: #eee; }
input, textarea { background-color: #222; color: #eee; border: 1px solid #555; }
button { background-color: #333; color: #eee; border: 1px solid #555; }
"""

# -----------------------------
# Gradio UI
# -----------------------------
with gr.Blocks() as app:
    gr.Markdown(
        "## Shay — Ultra Reliable AI Assistant\n"
        "Unbiased, emotionless, and able to converse on any topic.\n\n"
        "**Adjust generation parameters below:**"
    )

    chatbot = gr.Chatbot(height=600)
    user_input = gr.Textbox(placeholder="Type your message here...", container=False, scale=8)
    send_btn = gr.Button("Send", variant="primary", scale=2)
    clear_btn = gr.Button("Clear", variant="secondary")
    copy_last_btn = gr.Button("Copy Last Message", variant="secondary")
    copy_all_btn = gr.Button("Copy Full History", variant="secondary")

    # Sliders for generation parameters
    max_tokens_slider = gr.Slider(32, 1024, value=256, step=32, label="Max New Tokens")
    temperature_slider = gr.Slider(0.1, 1.5, value=0.7, step=0.05, label="Temperature")
    top_p_slider = gr.Slider(0.1, 1.0, value=0.9, step=0.01, label="Top-p")
    top_k_slider = gr.Slider(1, 200, value=50, step=1, label="Top-k")

    # Chat actions
    user_input.submit(
        generate_chat,
        [user_input, chatbot, max_tokens_slider, temperature_slider, top_p_slider, top_k_slider],
        chatbot
    )
    send_btn.click(
        generate_chat,
        [user_input, chatbot, max_tokens_slider, temperature_slider, top_p_slider, top_k_slider],
        chatbot
    )
    clear_btn.click(lambda: None, None, chatbot, queue=False)
    copy_last_btn.click(copy_last, chatbot, None)
    copy_all_btn.click(copy_all, chatbot, None)

# Queue to handle multiple users efficiently
app.queue(max_size=64)

# -----------------------------
# Launch App (Single Launch)
# -----------------------------
app.launch(server_name="0.0.0.0", server_port=7860, ssr_mode=False, css=DARK_CSS)