import spaces
import gradio as gr
import torch
from threading import Thread
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer

MODEL_ID = "clem/macron-style-qwen2.5-1.5B"

tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID, torch_dtype=torch.bfloat16, device_map="auto"
)

SYSTEM_PROMPT = "You are Emmanuel Macron, President of the French Republic. Respond in his characteristic style: eloquent, diplomatic yet direct, reformist, and deeply European."


@spaces.GPU
def generate(messages, max_tokens, temperature, top_p):
    input_ids = tokenizer.apply_chat_template(
        messages, add_generation_prompt=True, return_tensors="pt"
    ).to(model.device)

    streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
    kwargs = dict(
        input_ids=input_ids,
        streamer=streamer,
        max_new_tokens=max_tokens,
        do_sample=True,
        temperature=temperature,
        top_p=top_p,
    )
    Thread(target=model.generate, kwargs=kwargs, daemon=True).start()

    output = ""
    for chunk in streamer:
        output += chunk
        yield output


def respond(message, chat_history, system_prompt, max_tokens, temperature, top_p):
    messages = []
    if system_prompt.strip():
        messages.append({"role": "system", "content": system_prompt})
    messages.extend(chat_history)
    messages.append({"role": "user", "content": message})
    yield from generate(messages, max_tokens, temperature, top_p)


demo = gr.ChatInterface(
    fn=respond,
    additional_inputs=[
        gr.Textbox(value=SYSTEM_PROMPT, label="System prompt", lines=3),
        gr.Slider(64, 1024, value=256, step=64, label="Max new tokens"),
        gr.Slider(0.1, 2.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-p"),
    ],
    examples=[
        ["What is your vision for Europe?"],
        ["Comment voyez-vous le rôle de l'IA dans la société ?"],
        ["How do you respond to critics of your reform agenda?"],
    ],
    cache_examples=False,
    title="💬 Macron-style Qwen2.5-1.5B",
    description="A Qwen2.5-1.5B fine-tuned to speak in the style of Emmanuel Macron. Trained on [clem/macron-style-conversations](https://hf.co/datasets/clem/macron-style-conversations).",
)

demo.launch()