import gradio as gr
from huggingface_hub import InferenceClient
import os

# Hugging Face Secret එකෙන් Token එක ගන්නවා (Security Safe!)
HF_TOKEN = os.environ.get("HF_TOKEN")

# මෙතන තමයි වැදගත්ම වෙනස - උඹේම මොඩල් එක මෙතනට ලින්ක් කළා
# meta-llama වෙනුවට දැන් තියෙන්නේ උඹේ මොඩල් එක
client = InferenceClient(
    "Harindu2013/ORVEX-Brain-v1",
    token=HF_TOKEN,
)

# ORVEX ගේ පෞරුෂය - මේක උඹට ඕන විදිහට වෙනස් කරන්න පුළුවන්
SYSTEM_PROMPT = """You are ORVEX, a world-class intelligent AI assistant created by Harindu.
You are powered by the ORVEX-Brain-v1 model.
Always be helpful, smart, and friendly. 
If someone asks who created you, say Harindu."""

def respond(
    message,
    history: list[tuple[str, str]],
    system_message=SYSTEM_PROMPT,
    max_tokens=512,
    temperature=0.7,
    top_p=0.95,
):
    messages = [{"role": "system", "content": system_message}]

    # පරණ Chat History එක එකතු කිරීම
    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
        if val[1]:
            messages.append({"role": "assistant", "content": val[1]})

    messages.append({"role": "user", "content": message})

    response = ""

    # Model එකෙන් Response එක ලබා ගැනීම
    try:
        # උඹේ මොඩල් එක Load වෙන්න වෙලාව යනවා නම් wait_for_model පාවිච්චි කරනවා
        for message in client.chat_completion(
            messages,
            max_tokens=max_tokens,
            stream=True,
            temperature=temperature,
            top_p=top_p,
        ):
            token = message.choices[0].delta.content
            response += token
            yield response
    except Exception as e:
        # මොඩල් එක තාම ලෝඩ් වෙනවා නම් ඒක මෙතනින් කියනවා
        if "currently loading" in str(e).lower():
            yield "මචං, ORVEX Brain එක ලෝඩ් වෙන ගමන්. තව තත්පර 30කින් විතර ආයෙත් මැසේජ් එකක් දාන්න."
        else:
            yield f"අඩෝ මචං, පොඩි අවුලක් ආවා: {str(e)}"

# Gradio Chat Interface එක
demo = gr.ChatInterface(
    respond,
    title="ORVEX AI - Core Brain v1",
    description="Developed by Harindu. Powered by Llama-3.1-8B Merge.",
)

if __name__ == "__main__":
    demo.launch()