import gradio as gr from llama_cpp import Llama # Load GGUF model (runs on CPU) llm = Llama( model_path="quickdraw-tldraw.Q4_K_M.gguf", n_ctx=4096, n_threads=4, ) def respond(message, image, history): # For now, text-only (vision GGUF is complex) response = llm(message, max_tokens=2000, temperature=0.3) return response["choices"][0]["text"] # ... rest of Gradio code ``` **`requirements.txt`:** ``` gradio llama-cpp-python