import gradio as gr from koboldcpp import KoboldCpp from huggingface_hub import hf_hub_download # Download GGUF model REPO_ID = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF" FILENAME = "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf" model_path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME) # Load KoboldCpp runner llm = KoboldCpp( model_path=model_path, context_length=2048, threads=4 ) def chat_fn(message, history): response = llm.generate( prompt=message, max_length=256, temp=0.7, top_p=0.95, ) return response demo = gr.ChatInterface( fn=chat_fn, title="GGUF via KoboldCpp ⚡", ) demo.launch()