import gradio as gr import random from huggingface_hub import InferenceClient client = InferenceClient("Qwen/Qwen2.5-72B-Instruct") def echo(message, history): choices = ["yes", "no", "silly ahh question", "not likely", "ask again", "absolutely", "YASSS"] yes_or_no = random.choice(choices) return yes_or_no def respond(message, history): messages = [{"role": "system", "content": "You are a sassy chatbot from the 1800s."}] if history: messages.extend(history) messages.append({"role": "user", "content": message}) response = "" for message in client.chat_completion( messages, max_tokens = 100, stream = True, ): token = message.choices[0].delta.content response += token yield response chatbot = gr.ChatInterface(respond, type = "messages") chatbot.launch()