vgecbot / old /services /Models.py
harsh-dev's picture
docker deployment
4225666
from pathlib import Path
from langchain_community.llms import LlamaCpp
from langchain_community.chat_models import ChatLlamaCpp
from libs import MODEL_PATH
model_file = Path(MODEL_PATH) / "Qwen2.5-0.5B-Instruct-Q4_K_M.gguf"
# def load_model():
# return LlamaCpp(
# model_path=str(model_file),
# n_ctx=4096,
# max_tokens=256,
# n_threads=8,
# verbose=True
# )
def load_model():
return ChatLlamaCpp(
model_path=str(model_file), # Direct path
n_ctx=4096,
n_batch=512,
n_threads=4,
temperature=0.05,
top_p=0.8,
top_k=20,
repeat_penalty=1.1,
f16_kv=True,
verbose=False,
# No need for base_llm wrapper
)
if __name__ == "__main__":
pass