Spaces:
Sleeping
Sleeping
File size: 785 Bytes
4225666 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 | from pathlib import Path
from langchain_community.llms import LlamaCpp
from langchain_community.chat_models import ChatLlamaCpp
from libs import MODEL_PATH
model_file = Path(MODEL_PATH) / "Qwen2.5-0.5B-Instruct-Q4_K_M.gguf"
# def load_model():
# return LlamaCpp(
# model_path=str(model_file),
# n_ctx=4096,
# max_tokens=256,
# n_threads=8,
# verbose=True
# )
def load_model():
return ChatLlamaCpp(
model_path=str(model_file), # Direct path
n_ctx=4096,
n_batch=512,
n_threads=4,
temperature=0.05,
top_p=0.8,
top_k=20,
repeat_penalty=1.1,
f16_kv=True,
verbose=False,
# No need for base_llm wrapper
)
if __name__ == "__main__":
pass
|