# test.py from huggingface_hub import hf_hub_download from llama_cpp import Llama model_path = hf_hub_download( repo_id="psuplj/Meta-Llama-3-8B-Q4_K_M-GGUF", filename="meta-llama-3-8b.Q4_K_M.gguf", local_dir="./models" # where to store it ) llm = Llama(model_path=model_path, n_ctx=2048, n_threads=4) output = llm("Explain recursion", max_tokens=128) print(output["choices"][0]["text"])