Spaces:

seanpedrickcase
/

Light-PDF-Web-QA-Chatbot

Running

App Files Files Community

Light-PDF-Web-QA-Chatbot / tools /model_load.py

seanpedrickcase

Changed embedding model to mixed bread xsmall, optimised related functions in general. Updated Gemini model references.

d3a9db3 4 months ago

raw

history blame

2.22 kB

	import torch

	# Currently set gpu_layers to 0 even with cuda due to persistent bugs in implementation with cuda
	if torch.cuda.is_available():
	torch_device = "cuda"
	gpu_layers = 100
	else:
	torch_device = "cpu"
	gpu_layers = 0

	print("Running on device:", torch_device)
	threads = 8 #torch.get_num_threads()
	print("CPU threads:", threads)

	# Qwen 2 0.5B (small, fast) Model parameters
	temperature: float = 0.1
	top_k: int = 3
	top_p: float = 1
	repetition_penalty: float = 1.15
	#flan_alpaca_repetition_penalty: float = 1.3
	last_n_tokens: int = 64
	max_new_tokens: int = 1024
	seed: int = 42
	reset: bool = False
	stream: bool = True
	threads: int = threads
	batch_size:int = 128
	context_length:int = 4096
	sample = True

	# Bedrock parameters
	max_tokens = 4096


	class CtransInitConfig_gpu:
	def __init__(self,
	last_n_tokens=last_n_tokens,
	seed=seed,
	n_threads=threads,
	n_batch=batch_size,
	n_ctx=max_tokens,
	n_gpu_layers=gpu_layers):

	self.last_n_tokens = last_n_tokens
	self.seed = seed
	self.n_threads = n_threads
	self.n_batch = n_batch
	self.n_ctx = n_ctx
	self.n_gpu_layers = n_gpu_layers
	# self.stop: list[str] = field(default_factory=lambda: [stop_string])

	def update_gpu(self, new_value):
	self.n_gpu_layers = new_value

	class CtransInitConfig_cpu(CtransInitConfig_gpu):
	def __init__(self):
	super().__init__()
	self.n_gpu_layers = 0

	gpu_config = CtransInitConfig_gpu()
	cpu_config = CtransInitConfig_cpu()


	class CtransGenGenerationConfig:
	def __init__(self, temperature=temperature,
	top_k=top_k,
	top_p=top_p,
	repeat_penalty=repetition_penalty,
	seed=seed,
	stream=stream,
	max_tokens=max_new_tokens
	):
	self.temperature = temperature
	self.top_k = top_k
	self.top_p = top_p
	self.repeat_penalty = repeat_penalty
	self.seed = seed
	self.max_tokens=max_tokens
	self.stream = stream

	def update_temp(self, new_value):
	self.temperature = new_value