import gradio as gr
from llama_cpp import Llama

# Load GGUF model (runs on CPU)
llm = Llama(
    model_path="quickdraw-tldraw.Q4_K_M.gguf",
    n_ctx=4096,
    n_threads=4,
)

def respond(message, image, history):
    # For now, text-only (vision GGUF is complex)
    response = llm(message, max_tokens=2000, temperature=0.3)
    return response["choices"][0]["text"]

# ... rest of Gradio code
```

**`requirements.txt`:**
```
gradio
llama-cpp-python