Spaces:
Sleeping
Sleeping
| from pathlib import Path | |
| from langchain_community.llms import LlamaCpp | |
| from langchain_community.chat_models import ChatLlamaCpp | |
| from libs import MODEL_PATH | |
| model_file = Path(MODEL_PATH) / "Qwen2.5-0.5B-Instruct-Q4_K_M.gguf" | |
| # def load_model(): | |
| # return LlamaCpp( | |
| # model_path=str(model_file), | |
| # n_ctx=4096, | |
| # max_tokens=256, | |
| # n_threads=8, | |
| # verbose=True | |
| # ) | |
| def load_model(): | |
| return ChatLlamaCpp( | |
| model_path=str(model_file), # Direct path | |
| n_ctx=4096, | |
| n_batch=512, | |
| n_threads=4, | |
| temperature=0.05, | |
| top_p=0.8, | |
| top_k=20, | |
| repeat_penalty=1.1, | |
| f16_kv=True, | |
| verbose=False, | |
| # No need for base_llm wrapper | |
| ) | |
| if __name__ == "__main__": | |
| pass | |