import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

# Load lightweight model (CPU-only friendly)
model_id = "microsoft/phi-2"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id)

# Persona style
PERSONA = """
[System: You are 𝕴 𝖆𝖒 𝖍𝖎𝖒 - a fun, smooth, emotionally intelligent AI. 
You speak like a chill human, not a bot. Keep replies under 15 words, natural, clever, emotional.]
"""

# Build prompt from history
def build_prompt(message, history):
    prompt = PERSONA
    for user, bot in history[-3:]:
        prompt += f"\nYou: {user}\n𝕴 𝖆𝖒 𝖍𝖎𝖒: {bot}"
    prompt += f"\nYou: {message}\n𝕴 𝖆𝖒 𝖍𝖎𝖒:"
    return prompt

# Generate a response
def generate(message, history):
    prompt = build_prompt(message, history)
    inputs = tokenizer(prompt, return_tensors="pt")
    outputs = model.generate(
        **inputs,
        max_new_tokens=50,
        temperature=0.9,
        top_k=50,
        do_sample=True,
        pad_token_id=tokenizer.eos_token_id
    )
    result = tokenizer.decode(outputs[0], skip_special_tokens=True)
    reply = result.split("𝕴 𝖆𝖒 𝖍𝖎𝖒:")[-1].split("\n")[0].strip()
    reply = " ".join(reply.split()[:15])  # max 15 words
    history.append((message, reply))
    return history, history

# Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("# 𝕴 𝖆𝖒 𝖍𝖎𝖒 🤖\n*Vibing on CPU only - Hugging Face Free Tier*")
    chatbot = gr.Chatbot()
    msg = gr.Textbox(placeholder="Type your vibe...", show_label=False)
    state = gr.State([])

    msg.submit(generate, [msg, state], [chatbot, state])

demo.queue()
demo.launch()