TMElyralab
/

lyraChatGLM

Model card Files Files and versions

lyraChatGLM / demo.py

yibolu

add missing files, fix readme and add docker file

308345c over 2 years ago

1.15 kB

	# coding=utf-8

	from transformers import AutoTokenizer
	from lyraChatGLM import GLM6B, FasterChatGLM
	import os

	current_workdir = os.path.dirname(__file__)

	MAX_OUT_LEN = 100
	chatglm6b_dir = os.path.join(current_workdir, "models")
	tokenizer = AutoTokenizer.from_pretrained(chatglm6b_dir, trust_remote_code=True)
	input_str = ["为什么我们需要对深度学习模型加速？", ]
	inputs = tokenizer(input_str, return_tensors="pt", padding=True)
	input_ids = inputs.input_ids.to('cuda:0')

	plan_path = os.path.join(current_workdir, "models/glm6b-bs8.ftm")

	# kernel for chat model.
	kernel = GLM6B(plan_path=plan_path,
	batch_size=1,
	num_beams=1,
	use_cache=True,
	num_heads=32,
	emb_size_per_heads=128,
	decoder_layers=28,
	vocab_size=150528,
	max_seq_len=MAX_OUT_LEN)

	chat = FasterChatGLM(model_dir=chatglm6b_dir, kernel=kernel).half().cuda()

	# generate
	sample_output = chat.generate(inputs=input_ids, max_length=MAX_OUT_LEN)
	# de-tokenize model output to text
	res = tokenizer.decode(sample_output[0], skip_special_tokens=True)
	print(res)