from huggingface_hub import hf_hub_download from llama_cpp import Llama # ✅ No custom cache path — let it default model_path = hf_hub_download( repo_id="TheBloke/Mistral-7B-Instruct-v0.2-GGUF", filename="mistral-7b-instruct-v0.2.Q6_K.gguf" ) # Load the model llm = Llama(model_path=model_path) def lcpp_llm(prompt, max_tokens=128, temperature=0.7, top_p=0.95, top_k=50): output = llm( prompt=prompt, max_tokens=max_tokens, temperature=temperature, top_p=top_p, top_k=top_k ) return output