Spaces:

harshvisualz
/

vgecbot

Sleeping

docker deployment

4225666 about 1 month ago

785 Bytes

	from pathlib import Path
	from langchain_community.llms import LlamaCpp
	from langchain_community.chat_models import ChatLlamaCpp

	from libs import MODEL_PATH

	model_file = Path(MODEL_PATH) / "Qwen2.5-0.5B-Instruct-Q4_K_M.gguf"

	# def load_model():
	# return LlamaCpp(
	# model_path=str(model_file),
	# n_ctx=4096,
	# max_tokens=256,
	# n_threads=8,
	# verbose=True
	# )

	def load_model():
	return ChatLlamaCpp(
	model_path=str(model_file), # Direct path
	n_ctx=4096,
	n_batch=512,
	n_threads=4,
	temperature=0.05,
	top_p=0.8,
	top_k=20,
	repeat_penalty=1.1,
	f16_kv=True,
	verbose=False,
	# No need for base_llm wrapper
	)


	if __name__ == "__main__":
	pass