import gradio as gr import random from huggingface_hub import InferenceClient client = InferenceClient("Qwen/Qwen2.5-72B-Instruct") #change the LLM def respond(message, history): messages = [{"role": "system", "content": "You are a sassy chatbot."}] #change the personality if history: messages.extend(history) messages.append({"role":"user", "content": message}) response = "" for message in client.chat_completion( messages, max_tokens=500, stream=True, #temperature = 0.3, #top_p = 0.3 ): token = message.choices[0].delta.content response += token yield response # change length using max_tokens print(response['choices'][0]['message']['content'].strip()) def random_message(message, history): choices = ["yes.", "no.", "it is certain","without a doubt","outlook good","ask again later", "better not tell you now","very doubtful","don't count on it","my sources say no","outlook not so good","very doubtful","reply hazy, try again", "cannot predict now"] chat_answer = random.choice(choices) return chat_answer print("Hello, World!") chatbot = gr.ChatInterface(respond, type = "messages") chatbot.launch(debug=True)