from pathlib import Path from langchain_community.llms import LlamaCpp from langchain_community.chat_models import ChatLlamaCpp from libs import MODEL_PATH model_file = Path(MODEL_PATH) / "Qwen2.5-0.5B-Instruct-Q4_K_M.gguf" # def load_model(): # return LlamaCpp( # model_path=str(model_file), # n_ctx=4096, # max_tokens=256, # n_threads=8, # verbose=True # ) def load_model(): return ChatLlamaCpp( model_path=str(model_file), # Direct path n_ctx=4096, n_batch=512, n_threads=4, temperature=0.05, top_p=0.8, top_k=20, repeat_penalty=1.1, f16_kv=True, verbose=False, # No need for base_llm wrapper ) if __name__ == "__main__": pass