Spaces:

hypeconqueror1
/

BudgetBuddyPDFChat

Sleeping

BudgetBuddyPDFChat / LoadLLM.py

Update LoadLLM.py

2eabd29 verified almost 2 years ago

732 Bytes

	from langchain_community.llms import LlamaCpp
	from langchain.callbacks.manager import CallbackManager
	from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

	model_path = 'llama-2-7b-chat.Q4_K_M.gguf'


	class Loadllm:
	@staticmethod
	def load_llm():
	callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
	# Prepare the LLM

	llm = LlamaCpp(
	model_path=model_path,
	n_gpu_layers=40,
	n_batch=512,
	n_ctx=1024,
	f16_kv=True, # MUST set to True, otherwise you will run into problem after a couple of calls
	callback_manager=callback_manager,
	verbose=True,
	)

	return llm