Spaces:
Sleeping
Sleeping
| from langchain_community.llms import LlamaCpp | |
| from langchain.callbacks.manager import CallbackManager | |
| from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler | |
| model_path = 'llama-2-7b-chat.Q4_K_M.gguf' | |
| class Loadllm: | |
| def load_llm(): | |
| callback_manager = CallbackManager([StreamingStdOutCallbackHandler()]) | |
| # Prepare the LLM | |
| llm = LlamaCpp( | |
| model_path=model_path, | |
| n_gpu_layers=40, | |
| n_batch=512, | |
| n_ctx=1024, | |
| f16_kv=True, # MUST set to True, otherwise you will run into problem after a couple of calls | |
| callback_manager=callback_manager, | |
| verbose=True, | |
| ) | |
| return llm |