import gradio as gr
from llama_cpp import Llama
from huggingface_hub import hf_hub_download

# Download model
model_path = hf_hub_download(
    "mradermacher/Falcon-H1-Tiny-R-90M-GGUF",
    "Falcon-H1-Tiny-R-90M.Q2_K.gguf"
)

llm = Llama(model_path, n_ctx=512, n_threads=2)

def chat(message):
    response = llm(
        f"User: {message}\nAssistant:",
        max_tokens=50,
        temperature=0.7,
        stop=["User:"]
    )
    return response['choices'][0]['text'].strip()

demo = gr.Interface(chat, "text", "text")
demo.launch()