import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

# Load model and tokenizer
model_name = "microsoft/DialoGPT-small"  # ✅ open-access, no login required
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float32)
device = "cpu"


def chat_with_jarvis(messages, history):
    # Convert message list to a single prompt string
    conversation = ""
    for msg in messages:
        role = msg["role"]
        content = msg["content"]
        if role == "user":
            conversation += f"User: {content}\n"
        elif role == "assistant":
            conversation += f"Jarvis: {content}\n"

    # Add model response
    inputs = tokenizer(conversation, return_tensors="pt").to(device)
    outputs = model.generate(**inputs, max_new_tokens=200, temperature=0.7)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Return only the assistant’s latest message (new format)
    return {"role": "assistant", "content": response}

# --- Gradio Interface ---
demo = gr.ChatInterface(
    fn=chat_with_jarvis,
    title="Jarvis AI (Free CPU Edition)",
    description="A lightweight Jarvis that runs 24/7 on Hugging Face CPU for free.",
    type="messages",  # <-- Required for new Gradio versions
    theme="soft",
)

# Launch normally (Hugging Face handles hosting)
demo.launch()