Arya8831
/

llama2

Model card Files Files and versions

llama2 / modules /ctransformers_model.py

Arya8831's picture

Upload 239 files

c999ef2 over 2 years ago

history blame contribute delete

2.53 kB

	from ctransformers import AutoConfig, AutoModelForCausalLM

	from modules import shared
	from modules.callbacks import Iteratorize
	from modules.logging_colors import logger


	class CtransformersModel:
	def __init__(self):
	pass

	@classmethod
	def from_pretrained(cls, path):
	result = cls()

	config = AutoConfig.from_pretrained(
	str(path),
	threads=shared.args.threads if shared.args.threads != 0 else -1,
	gpu_layers=shared.args.n_gpu_layers,
	batch_size=shared.args.n_batch,
	context_length=shared.args.n_ctx,
	stream=True,
	mmap=not shared.args.no_mmap,
	mlock=shared.args.mlock
	)

	result.model = AutoModelForCausalLM.from_pretrained(
	str(result.model_dir(path) if result.model_type_is_auto() else path),
	model_type=(None if result.model_type_is_auto() else shared.args.model_type),
	config=config
	)

	logger.info(f'Using ctransformers model_type: {result.model.model_type} for {result.model.model_path}')
	return result, result

	def model_type_is_auto(self):
	return shared.args.model_type is None or shared.args.model_type == "Auto" or shared.args.model_type == "None"

	def model_dir(self, path):
	if path.is_file():
	return path.parent

	return path

	def encode(self, string, **kwargs):
	return self.model.tokenize(string)

	def decode(self, ids):
	return self.model.detokenize(ids)

	def generate(self, prompt, state, callback=None):
	prompt = prompt if type(prompt) is str else prompt.decode()
	# ctransformers uses -1 for random seed
	generator = self.model(
	prompt=prompt,
	max_new_tokens=state['max_new_tokens'],
	temperature=state['temperature'],
	top_p=state['top_p'],
	top_k=state['top_k'],
	repetition_penalty=state['repetition_penalty'],
	last_n_tokens=state['repetition_penalty_range'],
	seed=int(state['seed'])
	)

	output = ""
	for token in generator:
	if callback:
	callback(token)

	output += token

	return output

	def generate_with_streaming(self, args, *kwargs):
	with Iteratorize(self.generate, args, kwargs, callback=None) as generator:
	reply = ''
	for token in generator:
	reply += token
	yield reply