ChatGLM-6B

Runtime error

ChatGLM-6B / app.py

Lu Ken

use vicuna

fa02e71 over 1 year ago

1.64 kB

	from transformers import AutoModel, AutoTokenizer, LlamaTokenizer, LlamaForCausalLM
	import gradio as gr
	import torch

	DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

	tokenizer = LlamaTokenizer.from_pretrained("lmsys/vicuna-7b-v1.3", trust_remote_code=True)
	model = LlamaForCausalLM.from_pretrained("lmsys/vicuna-7b-v1.3", trust_remote_code=True).to(DEVICE)
	model = model.eval()

	def predict(input, history=None):
	if history is None:
	history = []
	new_user_input_ids = tokenizer.encode(input + tokenizer.eos_token, return_tensors='pt')
	bot_input_ids = torch.cat([torch.LongTensor(history), new_user_input_ids], dim=-1)
	history = model.generate(bot_input_ids, max_length=1000, pad_token_id=tokenizer.eos_token_id).tolist()
	# convert the tokens to text, and then split the responses into the right format
	response = tokenizer.decode(history[0]).split("<\|endoftext\|>")
	response = [(response[i], response[i+1]) for i in range(0, len(response)-1, 2)] # convert to tuples of list
	return response, history


	with gr.Blocks() as demo:
	gr.Markdown('''## Confidential HuggingFace Runner
	''')
	state = gr.State([])
	chatbot = gr.Chatbot([], elem_id="chatbot").style(height=400)
	with gr.Row():
	with gr.Column(scale=4):
	txt = gr.Textbox(show_label=False, placeholder="Enter text and press enter").style(container=False)
	with gr.Column(scale=1):
	button = gr.Button("Generate")
	txt.submit(predict, [txt, state], [chatbot, state])
	button.click(predict, [txt, state], [chatbot, state])
	demo.queue().launch(share=True, server_name="0.0.0.0")