from huggingface_hub import hf_hub_download
from llama_cpp import Llama

# ✅ No custom cache path — let it default
model_path = hf_hub_download(
    repo_id="TheBloke/Mistral-7B-Instruct-v0.2-GGUF",
    filename="mistral-7b-instruct-v0.2.Q6_K.gguf"
)

# Load the model
llm = Llama(model_path=model_path)

def lcpp_llm(prompt, max_tokens=128, temperature=0.7, top_p=0.95, top_k=50):
    output = llm(
        prompt=prompt,
        max_tokens=max_tokens,
        temperature=temperature,
        top_p=top_p,
        top_k=top_k
    )
    return output