Changed embedding model to mixed bread xsmall, optimised related functions in general. Updated Gemini model references.
d3a9db3
| import torch | |
| # Currently set gpu_layers to 0 even with cuda due to persistent bugs in implementation with cuda | |
| if torch.cuda.is_available(): | |
| torch_device = "cuda" | |
| gpu_layers = 100 | |
| else: | |
| torch_device = "cpu" | |
| gpu_layers = 0 | |
| print("Running on device:", torch_device) | |
| threads = 8 #torch.get_num_threads() | |
| print("CPU threads:", threads) | |
| # Qwen 2 0.5B (small, fast) Model parameters | |
| temperature: float = 0.1 | |
| top_k: int = 3 | |
| top_p: float = 1 | |
| repetition_penalty: float = 1.15 | |
| #flan_alpaca_repetition_penalty: float = 1.3 | |
| last_n_tokens: int = 64 | |
| max_new_tokens: int = 1024 | |
| seed: int = 42 | |
| reset: bool = False | |
| stream: bool = True | |
| threads: int = threads | |
| batch_size:int = 128 | |
| context_length:int = 4096 | |
| sample = True | |
| # Bedrock parameters | |
| max_tokens = 4096 | |
| class CtransInitConfig_gpu: | |
| def __init__(self, | |
| last_n_tokens=last_n_tokens, | |
| seed=seed, | |
| n_threads=threads, | |
| n_batch=batch_size, | |
| n_ctx=max_tokens, | |
| n_gpu_layers=gpu_layers): | |
| self.last_n_tokens = last_n_tokens | |
| self.seed = seed | |
| self.n_threads = n_threads | |
| self.n_batch = n_batch | |
| self.n_ctx = n_ctx | |
| self.n_gpu_layers = n_gpu_layers | |
| # self.stop: list[str] = field(default_factory=lambda: [stop_string]) | |
| def update_gpu(self, new_value): | |
| self.n_gpu_layers = new_value | |
| class CtransInitConfig_cpu(CtransInitConfig_gpu): | |
| def __init__(self): | |
| super().__init__() | |
| self.n_gpu_layers = 0 | |
| gpu_config = CtransInitConfig_gpu() | |
| cpu_config = CtransInitConfig_cpu() | |
| class CtransGenGenerationConfig: | |
| def __init__(self, temperature=temperature, | |
| top_k=top_k, | |
| top_p=top_p, | |
| repeat_penalty=repetition_penalty, | |
| seed=seed, | |
| stream=stream, | |
| max_tokens=max_new_tokens | |
| ): | |
| self.temperature = temperature | |
| self.top_k = top_k | |
| self.top_p = top_p | |
| self.repeat_penalty = repeat_penalty | |
| self.seed = seed | |
| self.max_tokens=max_tokens | |
| self.stream = stream | |
| def update_temp(self, new_value): | |
| self.temperature = new_value |