import gradio as gr
from llama_cpp import Llama
from huggingface_hub import hf_hub_download

MODEL_REPO = "synthdragon-studios/reasonyx"
MODEL_FILE = "reasonyx.gguf"

model_path = hf_hub_download(
    repo_id=MODEL_REPO,
    filename=MODEL_FILE,
    repo_type="model"
)

llm = Llama(
    model_path=model_path,
    n_ctx=2048
)

def chat(message, history):
    prompt = "You are Reasonyx, an AI assistant; say it in the same language the user uses.\n\n"

    for user, bot in history:
        prompt += f"User: {user}\nAssistant: {bot}\n"

    prompt += f"User: {message}\nAssistant:"

    output = llm(
        prompt=prompt,
        max_tokens=256,
        stop=["User:"]
    )

    reply = output["choices"][0]["text"].strip()
    return reply


gr.ChatInterface(chat, title="Reasonyx").launch()