dornatestfull / app.py
rinrikatoki's picture
Update app.py
7256add verified
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
import gradio as gr
model_id = "rinrikatoki/dorna-merged-4bit"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32)
model = model.eval()
def chat(message, history):
if history is None:
history = []
prompt = ""
for user, bot in history:
prompt += f"<|user|>\n{user}\n<|assistant|>\n{bot}\n"
prompt += f"<|user|>\n{message}\n<|assistant|>\n"
input_ids = tokenizer(prompt, return_tensors="pt").input_ids
input_ids = input_ids.to(model.device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
output = model.generate(
input_ids,
max_new_tokens=512,
temperature=0.7,
top_p=0.95,
do_sample=True,
streamer=streamer,
)
output_text = tokenizer.decode(output[0], skip_special_tokens=True)
bot_reply = output_text.split("<|assistant|>")[-1].strip()
history.append((message, bot_reply))
return "", history
gr.ChatInterface(chat).launch()