import gradio as gr from llama_cpp import Llama from huggingface_hub import hf_hub_download # Download model model_path = hf_hub_download( "mradermacher/Falcon-H1-Tiny-R-90M-GGUF", "Falcon-H1-Tiny-R-90M.Q2_K.gguf" ) llm = Llama(model_path, n_ctx=512, n_threads=2) def chat(message): response = llm( f"User: {message}\nAssistant:", max_tokens=50, temperature=0.7, stop=["User:"] ) return response['choices'][0]['text'].strip() demo = gr.Interface(chat, "text", "text") demo.launch()