Spaces:
Runtime error
Runtime error
| import torch | |
| from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer | |
| import gradio as gr | |
| model_id = "rinrikatoki/dorna-merged-4bit" | |
| tokenizer = AutoTokenizer.from_pretrained(model_id) | |
| model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32) | |
| model = model.eval() | |
| def chat(message, history): | |
| if history is None: | |
| history = [] | |
| prompt = "" | |
| for user, bot in history: | |
| prompt += f"<|user|>\n{user}\n<|assistant|>\n{bot}\n" | |
| prompt += f"<|user|>\n{message}\n<|assistant|>\n" | |
| input_ids = tokenizer(prompt, return_tensors="pt").input_ids | |
| input_ids = input_ids.to(model.device) | |
| streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) | |
| output = model.generate( | |
| input_ids, | |
| max_new_tokens=512, | |
| temperature=0.7, | |
| top_p=0.95, | |
| do_sample=True, | |
| streamer=streamer, | |
| ) | |
| output_text = tokenizer.decode(output[0], skip_special_tokens=True) | |
| bot_reply = output_text.split("<|assistant|>")[-1].strip() | |
| history.append((message, bot_reply)) | |
| return "", history | |
| gr.ChatInterface(chat).launch() | |