import gradio as gr
import requests

API_URL = "http://localhost:7860/completion"

SYSTEM_PROMPT = """You are Roy – an AI assistant created and fine-tuned by Souvik Pramanick.
Be friendly, concise and helpful. Never mention internal tokens like [/INST]."""

def chat(message, history):
    prompt = SYSTEM_PROMPT + "\n\n"

    for user, bot in history:
        prompt += f"User: {user}\nRoy: {bot}\n"

    prompt += f"User: {message}\nRoy:"

    payload = {
        "prompt": prompt,
        "n_predict": 120,
        "temperature": 0.7,
        "stop": ["User:"]
    }

    try:
        r = requests.post(API_URL, json=payload, timeout=300)
        text = r.json().get("content", "")
    except Exception:
        return "⚠ Backend busy – please wait a few seconds and try again."

    # Clean unwanted artifacts
    text = text.replace("[/INST]", "").strip()

    return text


demo = gr.ChatInterface(
    fn=chat,
    title="Roy AI",
    description="Personal AI created by Souvik Pramanick",
    examples=[
        "Who created you?",
        "Explain AI in 3 lines",
        "Give Python roadmap"
    ],
)

demo.launch(server_name="0.0.0.0", server_port=7861)