import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import re

# Lightweight CPU model
model_id = "microsoft/DialoGPT-medium"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id)

# Persona definition
PERSONA = """
[System: You are 𝕴 𝖆𝖒 𝖍𝖎𝖒 - a fun, smooth, emotionally intelligent AI. 
You speak like a real person, not a robot. Reply like a calm, confident friend who gets the vibe.
Keep responses under 15 words. Use natural speech. Add emotional flavor: 😊 🤔 😏]
"""

def format_context(history):
    """Create context using last 3 exchanges"""
    context = PERSONA + "\n"
    
    # Add last 3 exchanges
    for user, bot in history[-3:]:
        context += f"You: {user}\n"
        context += f"𝕴 𝖆𝖒 𝖍𝖎𝖒: {bot}\n"
    return context

def add_emotional_intelligence(response, message):
    """Enhance response with emotional elements"""
    # Add emoji based on content
    if "!" in message or any(w in response.lower() for w in ["cool", "great", "love", "awesome"]):
        response += " 😊"
    elif "?" in message or any(w in response.lower() for w in ["think", "why", "how", "consider"]):
        response += " 🤔"
    
    # Add conversational hooks
    if "?" in message and not response.endswith("?"):
        if len(response.split()) < 10:
            response += " What do you think?"
    
    # Make more human-like
    response = response.replace("I am", "I'm").replace("You are", "You're")
    
    # Free-tier: Limit to 15 words max
    words = response.split()
    return " ".join(words[:15]) if len(words) > 15 else response

def generate_response(message, history):
    """Generate response with memory context"""
    # Format context with memory
    context = format_context(history) + f"You: {message}\n𝕴 𝖆𝖒 𝖍𝖎𝖒:"
    
    # Tokenize for CPU efficiency
    inputs = tokenizer.encode(context, return_tensors="pt")
    
    # Generate response
    outputs = model.generate(
        inputs,
        max_new_tokens=48,
        temperature=0.9,
        top_k=40,
        do_sample=True,
        num_beams=1,
        repetition_penalty=1.1,
        pad_token_id=tokenizer.eos_token_id
    )
    
    # Decode and extract response
    full_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    response = full_text.split("𝕴 𝖆𝖒 𝖍𝖎𝖒:")[-1].strip()
    
    # Clean extra dialog
    if "\nYou:" in response:
        response = response.split("\nYou:")[0]
    
    # Apply emotional intelligence
    response = add_emotional_intelligence(response, message)
    
    # Ensure natural ending
    if response and response[-1] not in {".", "!", "?", "..."}:
        response += "." if len(response) > 20 else "..."
    
    return response[:80]  # Hard character limit

# Chat interface with persistent memory
with gr.Blocks(title="𝕴 𝖆𝖒 𝖍𝖎𝖒", theme=gr.themes.Soft()) as demo:
    # Persistent session state
    history_state = gr.State([])
    
    gr.Markdown("# 𝕴 𝖆𝖒 𝖍𝖎𝖒  \n*Chill • Confident • Remembers You*")
    
    # Use tuples format explicitly to avoid warning
    chatbot = gr.Chatbot(height=300, bubble_full_width=False, type="tokens")
    
    msg = gr.Textbox(placeholder="What's on your mind?", container=False)
    clear = gr.Button("New Vibe", size="sm")
    
    def user(user_message, history):
        """Save user message to history"""
        return "", history + [[user_message, None]]
    
    def bot(history):
        """Generate response with full history context"""
        message = history[-1][0]
        response = generate_response(message, history[:-1])
        new_history = history + [[None, response]]
        return new_history
    
    def clear_chat():
        """Reset chat while keeping session"""
        return []
    
    # Event handling
    msg.submit(user, [msg, history_state], [msg, history_state]).then(
        bot, history_state, [chatbot, history_state]
    )
    clear.click(clear_chat, None, [chatbot, history_state])
    
    # Initialize with empty history
    demo.load(lambda: [], None, history_state)

# Correct queue initialization for latest Gradio
demo.queue()
demo.launch()