Spaces:

dl4ds
/

tutor_dev

Configuration error

tutor_dev / code /modules /chat /chat_model_loader.py

XThomasBU

logging fixed

3a1356f over 1 year ago

1.78 kB

	from langchain_openai import ChatOpenAI
	from langchain_community.llms import LlamaCpp
	import os
	from pathlib import Path
	from huggingface_hub import hf_hub_download


	class ChatModelLoader:
	def __init__(self, config):
	self.config = config
	self.huggingface_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")

	def _verify_model_cache(self, model_cache_path):
	hf_hub_download(
	repo_id=self.config["llm_params"]["local_llm_params"]["repo_id"],
	filename=self.config["llm_params"]["local_llm_params"]["filename"],
	cache_dir=model_cache_path,
	)
	return str(list(Path(model_cache_path).glob("/snapshots//*.gguf"))[0])

	def load_chat_model(self):
	if self.config["llm_params"]["llm_loader"] in [
	"gpt-3.5-turbo-1106",
	"gpt-4",
	"gpt-4o-mini",
	]:
	llm = ChatOpenAI(model_name=self.config["llm_params"]["llm_loader"])
	elif self.config["llm_params"]["llm_loader"] == "local_llm":
	n_batch = 512 # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
	model_path = self._verify_model_cache(
	self.config["llm_params"]["local_llm_params"]["model_path"]
	)
	llm = LlamaCpp(
	model_path=model_path,
	n_batch=n_batch,
	n_ctx=2048,
	f16_kv=True,
	verbose=True,
	n_threads=2,
	temperature=self.config["llm_params"]["local_llm_params"][
	"temperature"
	],
	)
	else:
	raise ValueError(
	f"Invalid LLM Loader: {self.config['llm_params']['llm_loader']}"
	)
	return llm