import gradio as gr import torch from transformers import AutoModelForSeq2SeqLM, AutoTokenizer model_name = "facebook/blenderbot-400M-distill" device = "cuda" if torch.cuda.is_available() else "cpu" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to(device) system_preamble = ( "You are a friendly, energetic motivational coach. " "Keep answers concise, positive, and actionable. " "When the user asks for exercises or steps, provide a short numbered list. " ) def generate_response(history, user_message): conversation = "" for u, r in history: conversation += "User: " + u + "\nCoach: " + r + "\n" conversation += "User: " + user_message + "\nCoach:" prompt = system_preamble + "\n" + conversation inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024).to(device) out = model.generate(**inputs, max_new_tokens=256, do_sample=True, top_p=0.9, temperature=0.8) reply = tokenizer.decode(out[0], skip_special_tokens=True).strip() return reply def chat(user_message, chat_history): if chat_history is None: chat_history = [] reply = generate_response(chat_history, user_message) chat_history.append((user_message, reply)) return chat_history, chat_history with gr.Blocks(title="Motivational Coach") as demo: gr.Markdown("