import gradio as gr from huggingface_hub import InferenceClient # Switch to DialoGPT - no formatting token issues client = InferenceClient("microsoft/DialoGPT-medium") def respond(message, history): # Build conversation context conversation = "" if history: for user_msg, assistant_msg in history: conversation += f"User: {user_msg}\nAssistant: {assistant_msg}\n" conversation += f"User: {message}\nAssistant:" response = client.text_generation( conversation, max_new_tokens=100, temperature=0.1, return_full_text=False, stop_sequences=["\nUser:", "User:"] ) yield response.strip() chatbot = gr.ChatInterface(respond, type="messages") chatbot.launch()