llm-mdcl-backend / your_rag_module.py
kjdeka's picture
Upload folder using huggingface_hub
cca5718 verified
raw
history blame contribute delete
556 Bytes
from huggingface_hub import hf_hub_download
from llama_cpp import Llama
# ✅ No custom cache path — let it default
model_path = hf_hub_download(
repo_id="TheBloke/Mistral-7B-Instruct-v0.2-GGUF",
filename="mistral-7b-instruct-v0.2.Q6_K.gguf"
)
# Load the model
llm = Llama(model_path=model_path)
def lcpp_llm(prompt, max_tokens=128, temperature=0.7, top_p=0.95, top_k=50):
output = llm(
prompt=prompt,
max_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
top_k=top_k
)
return output