import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

# ✅ Safe import of the GPU decorator
try:
    from spaces import GPU
except ImportError:
    def GPU(func): return func  # Fallback if not in a HF Space

# ✅ Load Phi-3 Mini model
model_id = "microsoft/phi-3-mini-4k-instruct"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id, torch_dtype="auto", device_map="auto"
)
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)

# ✅ Apply GPU decorator to ensure ZeroGPU allocates GPU
@GPU
def chat_fn(message, history):
    history_text = ""
    for item in history:
        if item["role"] == "user":
            history_text += f"<|user|>\n{item['content']}\n"
        elif item["role"] == "assistant":
            history_text += f"<|assistant|>\n{item['content']}\n"
    prompt = f"{history_text}<|user|>\n{message}\n<|assistant|>\n"

    result = pipe(prompt, max_new_tokens=512, do_sample=True, temperature=0.7)[0]['generated_text']
    reply = result.split("<|assistant|>")[-1].strip()

    # ✅ Auto-format Python or general code
    keywords = ["def ", "class ", "import ", "function ", "console.log", "public static void"]
    if "```" not in reply and any(k in reply for k in keywords):
        reply = f"```\n{reply.strip()}\n```"  # Wrap in Markdown code block

    return reply

# ✅ Gradio UI
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("## 💬 Chat with Phi-3 Mini")
    gr.Markdown("Welcome to your AI Assistant powered by Phi-3 Mini. Ask me anything or request code examples!")

    gr.ChatInterface(
        fn=chat_fn,
        title="",
        examples=[
            "What is Python?",
            "Write a JavaScript function to reverse a string.",
            "Explain how transformers work.",
        ],
        chatbot=gr.Chatbot(type="messages")
    )

demo.launch()