| from ctransformers import AutoConfig, AutoModelForCausalLM |
|
|
| from modules import shared |
| from modules.callbacks import Iteratorize |
| from modules.logging_colors import logger |
|
|
|
|
| class CtransformersModel: |
| def __init__(self): |
| pass |
|
|
| @classmethod |
| def from_pretrained(cls, path): |
| result = cls() |
|
|
| config = AutoConfig.from_pretrained( |
| str(path), |
| threads=shared.args.threads if shared.args.threads != 0 else -1, |
| gpu_layers=shared.args.n_gpu_layers, |
| batch_size=shared.args.n_batch, |
| context_length=shared.args.n_ctx, |
| stream=True, |
| mmap=not shared.args.no_mmap, |
| mlock=shared.args.mlock |
| ) |
|
|
| result.model = AutoModelForCausalLM.from_pretrained( |
| str(result.model_dir(path) if result.model_type_is_auto() else path), |
| model_type=(None if result.model_type_is_auto() else shared.args.model_type), |
| config=config |
| ) |
|
|
| logger.info(f'Using ctransformers model_type: {result.model.model_type} for {result.model.model_path}') |
| return result, result |
|
|
| def model_type_is_auto(self): |
| return shared.args.model_type is None or shared.args.model_type == "Auto" or shared.args.model_type == "None" |
|
|
| def model_dir(self, path): |
| if path.is_file(): |
| return path.parent |
|
|
| return path |
|
|
| def encode(self, string, **kwargs): |
| return self.model.tokenize(string) |
|
|
| def decode(self, ids): |
| return self.model.detokenize(ids) |
|
|
| def generate(self, prompt, state, callback=None): |
| prompt = prompt if type(prompt) is str else prompt.decode() |
| |
| generator = self.model( |
| prompt=prompt, |
| max_new_tokens=state['max_new_tokens'], |
| temperature=state['temperature'], |
| top_p=state['top_p'], |
| top_k=state['top_k'], |
| repetition_penalty=state['repetition_penalty'], |
| last_n_tokens=state['repetition_penalty_range'], |
| seed=int(state['seed']) |
| ) |
|
|
| output = "" |
| for token in generator: |
| if callback: |
| callback(token) |
|
|
| output += token |
|
|
| return output |
|
|
| def generate_with_streaming(self, *args, **kwargs): |
| with Iteratorize(self.generate, args, kwargs, callback=None) as generator: |
| reply = '' |
| for token in generator: |
| reply += token |
| yield reply |
|
|