Spaces:

rinrikatoki
/

dornatestfull

Runtime error

Update app.py

7256add verified 11 months ago

1.2 kB

	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
	import gradio as gr

	model_id = "rinrikatoki/dorna-merged-4bit"

	tokenizer = AutoTokenizer.from_pretrained(model_id)
	model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32)
	model = model.eval()

	def chat(message, history):
	if history is None:
	history = []

	prompt = ""
	for user, bot in history:
	prompt += f"<\|user\|>\n{user}\n<\|assistant\|>\n{bot}\n"
	prompt += f"<\|user\|>\n{message}\n<\|assistant\|>\n"

	input_ids = tokenizer(prompt, return_tensors="pt").input_ids
	input_ids = input_ids.to(model.device)

	streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

	output = model.generate(
	input_ids,
	max_new_tokens=512,
	temperature=0.7,
	top_p=0.95,
	do_sample=True,
	streamer=streamer,
	)

	output_text = tokenizer.decode(output[0], skip_special_tokens=True)
	bot_reply = output_text.split("<\|assistant\|>")[-1].strip()
	history.append((message, bot_reply))
	return "", history

	gr.ChatInterface(chat).launch()