import gradio as gr
from koboldcpp import KoboldCpp
from huggingface_hub import hf_hub_download

# Download GGUF model
REPO_ID = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
FILENAME = "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"

model_path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME)

# Load KoboldCpp runner
llm = KoboldCpp(
    model_path=model_path,
    context_length=2048,
    threads=4
)

def chat_fn(message, history):
    response = llm.generate(
        prompt=message,
        max_length=256,
        temp=0.7,
        top_p=0.95,
    )
    return response

demo = gr.ChatInterface(
    fn=chat_fn,
    title="GGUF via KoboldCpp ⚡",
)

demo.launch()