# test.py
from huggingface_hub import hf_hub_download
from llama_cpp import Llama

model_path = hf_hub_download(
    repo_id="psuplj/Meta-Llama-3-8B-Q4_K_M-GGUF",
    filename="meta-llama-3-8b.Q4_K_M.gguf",
    local_dir="./models"     # where to store it
)

llm = Llama(model_path=model_path, n_ctx=2048, n_threads=4)

output = llm("Explain recursion", max_tokens=128)
print(output["choices"][0]["text"])