import gradio as gr
from huggingface_hub import hf_hub_download
from llama_cpp import Llama

# 📥 Model Download
model_path = hf_hub_download(
    repo_id="RockSky1/Infinity_1.0",
    filename="Infinity_1.0.gguf"
)

print("Loading model...")

llm = Llama(
    model_path=model_path,
    n_ctx=512,
    n_threads=2,
    n_batch=128
)

print("Model loaded ✅")


def chat_function(message, history):
    try:
        output = llm.create_chat_completion(
            messages=[
                {
                    "role": "system",
                    "content": "You are Infinity AI 🔥 created by Shivam Kumar (RockSky1) from Bihar, India."
                },
                *[
                    {"role": "user", "content": h[0]} if i % 2 == 0
                    else {"role": "assistant", "content": h[1]}
                    for i, h in enumerate(history)
                ],
                {"role": "user", "content": message}
            ],
            max_tokens=128,
            temperature=0.7
        )

        reply = output["choices"][0]["message"]["content"]

    except Exception as e:
        reply = f"Error: {str(e)}"

    return reply


with gr.Blocks() as demo:
    gr.Markdown("# ♾️ Infinity-LLM v1.0 🚀")

    gr.ChatInterface(
        fn=chat_function,
        examples=[
            "Who is Shivam Kumar?",
            "Tell me about Infinity-LLM.",
            "Write a simple Python script."
        ]
    )

demo.launch()